Esempio n. 1
0
import glob
import sys
import sklearn as sk

rankings_files = glob.glob(
    '../output_pkis1loto/rankings/bl/ecfp6/ranked_df_16_*.csv')
rankings_files = rankings_files + glob.glob(
    '../output_pkis1loto/rankings/hz/*.csv')
rankings_files = rankings_files + glob.glob(
    '../output_pkis1loto/rankings/cp/*.csv')
# alphabetize the file list so we can re-arrange later
rankings_files.sort()

activity_matrix_file = '../data/pkis1.csv'
df_continuous = inf.get_continuous(activity_matrix_file)
df_binary = inf.get_binary(df_continuous)

s_list_f1 = []
s_list_mcc = []

#load rankings
for f in rankings_files:
    print('{}').format(f)
    df_rankings = pd.read_csv(f, index_col='molid')
    df_rankings.index = df_rankings.index.map(str)
    # set up dictionaries to store F1 and MCC for each target
    temp_dict_f1 = {}
    temp_dict_mcc = {}
    df_rankings.replace('informer', -1000.0, inplace=True)
    for targ in df_rankings.columns:
        if df_rankings[targ].count() < 300:
else:
    print('matrix dataset (arg4) was invalid')
    exit(1)

ranked_sets = []

print('targ, n_inf, inf_sel, ranking, matrix, est_thresh, num_act_inf')
for targ in ['bglf4', 'pknb', 'rop18']:
    for inf_selection in ['c', 'p']:
        for ranking in ['s', 'l', 'w']:

            # read in data matrix (PKIS1 or PKIS2)
            df_act_mat = inf.get_continuous(activity_data_csv)

            # get binary data matrix (labels real, not inferred)
            df_binary = inf.get_binary(df_act_mat)

            # get informers
            if inf_selection == 'c':
                inf_molids = inf.inf_sel_clst_medoids(n_informers, fps_file)
            elif inf_selection == 'p':
                inf_molids = inf.inf_sel_max_prom_global(
                    n_informers, df_binary)
            else:
                print('informer selection method, {}, is invalid.').format(
                    inf_selection)
                exit(1)

            # get the thresholds
            df_thresh = pd.read_csv(
                '../data/thresholds_2sigma/newtarget_thresholds.csv',