import glob import sys import sklearn as sk rankings_files = glob.glob( '../output_pkis1loto/rankings/bl/ecfp6/ranked_df_16_*.csv') rankings_files = rankings_files + glob.glob( '../output_pkis1loto/rankings/hz/*.csv') rankings_files = rankings_files + glob.glob( '../output_pkis1loto/rankings/cp/*.csv') # alphabetize the file list so we can re-arrange later rankings_files.sort() activity_matrix_file = '../data/pkis1.csv' df_continuous = inf.get_continuous(activity_matrix_file) df_binary = inf.get_binary(df_continuous) s_list_f1 = [] s_list_mcc = [] #load rankings for f in rankings_files: print('{}').format(f) df_rankings = pd.read_csv(f, index_col='molid') df_rankings.index = df_rankings.index.map(str) # set up dictionaries to store F1 and MCC for each target temp_dict_f1 = {} temp_dict_mcc = {} df_rankings.replace('informer', -1000.0, inplace=True) for targ in df_rankings.columns: if df_rankings[targ].count() < 300:
else: print('matrix dataset (arg4) was invalid') exit(1) ranked_sets = [] print('targ, n_inf, inf_sel, ranking, matrix, est_thresh, num_act_inf') for targ in ['bglf4', 'pknb', 'rop18']: for inf_selection in ['c', 'p']: for ranking in ['s', 'l', 'w']: # read in data matrix (PKIS1 or PKIS2) df_act_mat = inf.get_continuous(activity_data_csv) # get binary data matrix (labels real, not inferred) df_binary = inf.get_binary(df_act_mat) # get informers if inf_selection == 'c': inf_molids = inf.inf_sel_clst_medoids(n_informers, fps_file) elif inf_selection == 'p': inf_molids = inf.inf_sel_max_prom_global( n_informers, df_binary) else: print('informer selection method, {}, is invalid.').format( inf_selection) exit(1) # get the thresholds df_thresh = pd.read_csv( '../data/thresholds_2sigma/newtarget_thresholds.csv',