Esempio n. 1
0
def test_plot_graph_measure_hists(long, nan):
    """Test plotting histograms from metric dataframe."""
    import pandas as pd

    base_dir = str(Path(__file__).parent / "examples")
    temp_dir = tempfile.TemporaryDirectory()
    dir_name = str(temp_dir.name)

    # This is to make sure the file gets written to the temp dir
    os.mkdir(f"{dir_name}/dir_a")
    os.mkdir(f"{dir_name}/dir_a/dir_b")
    os.mkdir(f"{dir_name}/dir_a/dir_b/dir_c")

    # This file isn't actually used, the output is written a few dirs up.
    net_pick_file = f"{dir_name}/dir_a/dir_b/dir_c/netstats.pkl"

    df_csv = (
        f"{base_dir}/miscellaneous/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_th"
        f"r-0.9_net_mets.csv")

    # Hack the dataframe
    df = pd.read_csv(df_csv)
    if long:
        for row in range(31):
            df = df.append(pd.read_csv(df_csv), ignore_index=True)
    for column in range(7):
        df.columns.values[column] = f"{df.columns.values[column]}_auc"
        if nan:
            df[column] = np.nan

    measures = df.columns.values[:7]

    plot_gen.plot_graph_measure_hists(df, measures, net_pick_file)

    temp_dir.cleanup()
Esempio n. 2
0
def test_plot_graph_measure_hists(nan):
    """Test plotting histograms from metric dataframe."""
    import pandas as pd

    base_dir = str(Path(__file__).parent/"examples")
    temp_dir = tempfile.TemporaryDirectory()
    dir_name = str(temp_dir.name)

    df_csv = (f"{base_dir}/miscellaneous/metrics_sub-OAS31172_ses-d0407_topology_auc_clean.csv")

    # Hack the dataframe
    if nan is True:
        df = pd.read_csv(df_csv)
        df[df.columns[4]] = np.nan
        df.to_csv(f"{dir_name}/TEST.csv", index=False)
        fig = plot_gen.plot_graph_measure_hists(f"{dir_name}/TEST.csv")
    else:
        fig = plot_gen.plot_graph_measure_hists(df_csv)
    assert fig is not None
    temp_dir.cleanup()
Esempio n. 3
0
def collect_pandas_df_make(net_pickle_mt_list, ID, network, plot_switch):
    """

    :param net_pickle_mt_list:
    :param ID:
    :param network:
    :param plot_switch:
    :return:
    """
    import pandas as pd
    import numpy as np
    import matplotlib
    matplotlib.use('Agg')
    from itertools import chain

    # Check for existence of net_pickle files, condensing final list to only those that were actually produced.
    net_pickle_mt_list_exist = []
    for net_pickle_mt in list(net_pickle_mt_list):
        if op.isfile(net_pickle_mt) is True:
            net_pickle_mt_list_exist.append(net_pickle_mt)

    if len(list(net_pickle_mt_list)) > len(net_pickle_mt_list_exist):
        raise UserWarning('Warning! Number of actual models produced less than expected. Some graphs were excluded')

    net_pickle_mt_list = net_pickle_mt_list_exist

    if len(net_pickle_mt_list) > 1:
        print("%s%s%s" % ('\n\nList of result files to concatenate:\n', str(net_pickle_mt_list), '\n\n'))
        subject_path = op.dirname(op.dirname(net_pickle_mt_list[0]))
        name_of_network_pickle = "%s%s" % ('net_mets_',
                                           net_pickle_mt_list[0].split('_0.')[0].split('net_mets_')[1])
        net_pickle_mt_list.sort()

        list_ = []
        models = []
        for file_ in net_pickle_mt_list:
            df = pd.read_pickle(file_)
            try:
                node_cols = [s for s in list(df.columns) if isinstance(s, int) or any(c.isdigit() for c in s)]
                df = df.drop(node_cols, axis=1)
                models.append(op.basename(file_))
            except RuntimeError:
                print('Error: Node column removal failed for mean stats file...')
            list_.append(df)

        try:
            # Concatenate and find mean across dataframes
            list_of_dicts = [cur_df.T.to_dict().values() for cur_df in list_]
            df_concat = pd.DataFrame(list(chain(*list_of_dicts)))
            df_concat["Model"] = np.array([i.replace('_net_mets', '') for i in models])
            measures = list(df_concat.columns)
            measures.remove('id')
            measures.remove('Model')
            if plot_switch is True:
                from pynets.plotting import plot_gen
                plot_gen.plot_graph_measure_hists(df_concat, measures, file_)
            df_concatted = df_concat.loc[:, measures].mean().to_frame().transpose()
            df_concatted_std = df_concat.loc[:, measures].std().to_frame().transpose()
            df_concatted.columns = [str(col) + '_mean' for col in df_concatted.columns]
            df_concatted_std.columns = [str(col) + '_std_dev' for col in df_concatted_std.columns]
            result = pd.concat([df_concatted, df_concatted_std], axis=1)
            df_concatted_final = result.reindex(sorted(result.columns), axis=1)
            print('\nConcatenating dataframes for ' + str(ID) + '...\n')
            if network:
                net_pick_out_path = "%s%s%s%s%s%s%s%s" % (subject_path, '/', str(ID), '_', name_of_network_pickle, '_',
                                                          network, '_mean')
            else:
                net_pick_out_path = "%s%s%s%s%s%s" % (subject_path, '/', str(ID), '_', name_of_network_pickle, '_mean')
            df_concatted_final.to_pickle(net_pick_out_path)
            df_concatted_final.to_csv("%s%s" % (net_pick_out_path, '.csv'), index=False)

        except RuntimeWarning:
            print("%s%s%s" % ('\nWARNING: DATAFRAME CONCATENATION FAILED FOR ', str(ID), '!\n'))
            pass
    else:
        if network is not None:
            print("%s%s%s%s%s" % ('\nSingle dataframe for the ', network, ' network for: ', ID, '\n'))
        else:
            print("%s%s%s" % ('\nSingle dataframe for: ', ID, '\n'))
        pass

    return