Esempio n. 1
0
def permute_table(db, latex_f = False):
    """
    generate latex table for permutation test
    :param db:
    :return:
    """
    df1 = permute(db, 'reddit_5K')
    df2 = permute(db, 'protein_data')
    df = pd.concat([df_format(df1), df_format(df2)], axis=1)
    df.columns = pd.MultiIndex.from_tuples([tuple(c.split('.')) for c in df.columns])
    print(df)
    if latex_f:
        return df.to_latex()
Esempio n. 2
0
def examine_homology(db, latex_f = False, stoa = {}):
    """
        examine the performance of different methods on one graph
        fix permute = False, flip = False, epd = False

        stoa is a dict
    """

    # query = 'permute == False and flip == False and _id >=0 and graph=="' + graph + '"'
    query = f'permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and feat=="{FEAT}" and fil!="random"'
    df = sacred_to_df(db.runs).query(query)
    grouped = df.groupby(['epd', 'graph'], as_index=False)

    df = grouped['result'].aggregate(max)
    df = df.pivot(index='epd', columns='graph', values='result')
    df = df_format(df)
    df.drop(['dhfr,', 'reddit_12K'], axis=1) # drop certain columns
    stoa = {k:v for k,v in stoa.items() if k in df.columns}
    df = df.append(stoa, ignore_index=True)


    caption = f'Here is the summary of whether to add 1 homolgy for different graphs' + query
    print(caption)
    print(df)
    print()
    if latex_f:
        print(df.to_latex(longtable=False).replace('\n', '\n\\caption{' + caption + '}\\\\\n', 1))
    return df
Esempio n. 3
0
def examine_one_feat(db, feat ='sw', latex_f = False):
    """
    examine one featuralization method for different graph and filtration
    fix permute = False, flip = False, epd = False
    """

    query = f'permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and feat=="{feat}"'
    df = sacred_to_df(db.runs).query(query)
    grouped = df.groupby(['graph', 'fil'], as_index=False)
    df = grouped['result'].aggregate(max)
    df = df.pivot(index='graph', columns='fil', values='result')
    df = df_format(df)

    print('Here is the summary for %s featuralization\n'%feat)
    print(df)
    print()
    if latex_f:
        print(df.to_latex())
Esempio n. 4
0
def examine_one_graph(db, graph='mutag', latex_f = False, n_cv = 1, print_flag = False):

    """
    examine the performance of different methods on one graph
    fix permute = False, flip = False, epd = False
    """
    # query = 'permute == False and flip == False and _id >=560 and graph=="' + graph + '"'
    query = f'n_cv=={n_cv} and permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and ntda!=True'
    df = sacred_to_df(db.runs).query(query)

    # add sw with permutation as True
    query = f'n_cv=={n_cv} and permute == True and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="sw" and ntda!=True'
    df_ = sacred_to_df(db.runs).query(query)
    df_.feat = 'sw_p' # df_.rename(columns={'feat': 'population'}, inplace=True)
    df = pd.concat([df, df_])

    # add pf with permutation as False
    query = f'n_cv=={n_cv} and permute == False and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="pf" and ntda!=True'
    df_ = sacred_to_df(db.runs).query(query)
    df_.feat = 'pf'
    df = pd.concat([df, df_])

    # add filvec
    query = f'n_cv=={n_cv} and permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="pervec" and ntda==True'
    df_ = sacred_to_df(db.runs).query(query)
    df_.feat = 'filvec'  # df_.rename(columns={'feat': 'population'}, inplace=True)
    df = pd.concat([df, df_])
    allowed_feats = ['pervec', 'sw', 'sw_p', 'filvec', 'pf']

    if False: # turned on sometimes
        query = f'permute == True and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="pss"'
        df_ = sacred_to_df(db.runs).query(query)
        df_.feat = 'pss_p'  # df_.rename(columns={'feat': 'population'}, inplace=True)
        df = pd.concat([df, df_])

        query = f'permute == True and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="wg"'
        df_ = sacred_to_df(db.runs).query(query)
        df_.feat = 'wg_p'  # df_.rename(columns={'feat': 'population'}, inplace=True)
        df = pd.concat([df, df_])

        # filter out pervector and pss
        allowed_feats = ['pervec', 'sw', 'sw_p', 'pss', 'pss_p', 'wg', 'wg_p', 'pf']
        df = df[df.feat.isin(allowed_feats)]

    df = df[df.feat.isin(allowed_feats)]
    if print_flag: print(df.to_string())

    grouped = df.groupby(['feat', 'fil'], as_index=False)
    df = grouped['result'].aggregate(max)
    df = df.pivot(index='feat', columns='fil', values='result')
    drop_cols = ['random']
    drop_cols = [c for c in drop_cols if c in df.columns]
    df = df.drop(drop_cols, axis=1)

    df = df_format(df)
    df['mean'] = df.mean(axis=1) # add mean column
    df.round({'mean': 2})

    caption = f'Here is the summary of different feats for graph {graph} with n_cv {n_cv}\n' + query
    print(caption)
    print(df)
    print('-'*150)
    if latex_f:
        print(df.to_latex(longtable=False).replace('\n', '\n\\caption{' + caption + '}\\\\\n', 1))
    return df