def permute_table(db, latex_f = False): """ generate latex table for permutation test :param db: :return: """ df1 = permute(db, 'reddit_5K') df2 = permute(db, 'protein_data') df = pd.concat([df_format(df1), df_format(df2)], axis=1) df.columns = pd.MultiIndex.from_tuples([tuple(c.split('.')) for c in df.columns]) print(df) if latex_f: return df.to_latex()
def examine_homology(db, latex_f = False, stoa = {}): """ examine the performance of different methods on one graph fix permute = False, flip = False, epd = False stoa is a dict """ # query = 'permute == False and flip == False and _id >=0 and graph=="' + graph + '"' query = f'permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and feat=="{FEAT}" and fil!="random"' df = sacred_to_df(db.runs).query(query) grouped = df.groupby(['epd', 'graph'], as_index=False) df = grouped['result'].aggregate(max) df = df.pivot(index='epd', columns='graph', values='result') df = df_format(df) df.drop(['dhfr,', 'reddit_12K'], axis=1) # drop certain columns stoa = {k:v for k,v in stoa.items() if k in df.columns} df = df.append(stoa, ignore_index=True) caption = f'Here is the summary of whether to add 1 homolgy for different graphs' + query print(caption) print(df) print() if latex_f: print(df.to_latex(longtable=False).replace('\n', '\n\\caption{' + caption + '}\\\\\n', 1)) return df
def examine_one_feat(db, feat ='sw', latex_f = False): """ examine one featuralization method for different graph and filtration fix permute = False, flip = False, epd = False """ query = f'permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and feat=="{feat}"' df = sacred_to_df(db.runs).query(query) grouped = df.groupby(['graph', 'fil'], as_index=False) df = grouped['result'].aggregate(max) df = df.pivot(index='graph', columns='fil', values='result') df = df_format(df) print('Here is the summary for %s featuralization\n'%feat) print(df) print() if latex_f: print(df.to_latex())
def examine_one_graph(db, graph='mutag', latex_f = False, n_cv = 1, print_flag = False): """ examine the performance of different methods on one graph fix permute = False, flip = False, epd = False """ # query = 'permute == False and flip == False and _id >=560 and graph=="' + graph + '"' query = f'n_cv=={n_cv} and permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and ntda!=True' df = sacred_to_df(db.runs).query(query) # add sw with permutation as True query = f'n_cv=={n_cv} and permute == True and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="sw" and ntda!=True' df_ = sacred_to_df(db.runs).query(query) df_.feat = 'sw_p' # df_.rename(columns={'feat': 'population'}, inplace=True) df = pd.concat([df, df_]) # add pf with permutation as False query = f'n_cv=={n_cv} and permute == False and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="pf" and ntda!=True' df_ = sacred_to_df(db.runs).query(query) df_.feat = 'pf' df = pd.concat([df, df_]) # add filvec query = f'n_cv=={n_cv} and permute == {PERMUTE} and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="pervec" and ntda==True' df_ = sacred_to_df(db.runs).query(query) df_.feat = 'filvec' # df_.rename(columns={'feat': 'population'}, inplace=True) df = pd.concat([df, df_]) allowed_feats = ['pervec', 'sw', 'sw_p', 'filvec', 'pf'] if False: # turned on sometimes query = f'permute == True and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="pss"' df_ = sacred_to_df(db.runs).query(query) df_.feat = 'pss_p' # df_.rename(columns={'feat': 'population'}, inplace=True) df = pd.concat([df, df_]) query = f'permute == True and flip == {FLIP} and _id >={ID_THRESHOLD} and graph=="{graph}" and feat=="wg"' df_ = sacred_to_df(db.runs).query(query) df_.feat = 'wg_p' # df_.rename(columns={'feat': 'population'}, inplace=True) df = pd.concat([df, df_]) # filter out pervector and pss allowed_feats = ['pervec', 'sw', 'sw_p', 'pss', 'pss_p', 'wg', 'wg_p', 'pf'] df = df[df.feat.isin(allowed_feats)] df = df[df.feat.isin(allowed_feats)] if print_flag: print(df.to_string()) grouped = df.groupby(['feat', 'fil'], as_index=False) df = grouped['result'].aggregate(max) df = df.pivot(index='feat', columns='fil', values='result') drop_cols = ['random'] drop_cols = [c for c in drop_cols if c in df.columns] df = df.drop(drop_cols, axis=1) df = df_format(df) df['mean'] = df.mean(axis=1) # add mean column df.round({'mean': 2}) caption = f'Here is the summary of different feats for graph {graph} with n_cv {n_cv}\n' + query print(caption) print(df) print('-'*150) if latex_f: print(df.to_latex(longtable=False).replace('\n', '\n\\caption{' + caption + '}\\\\\n', 1)) return df