def main(): args = parse_arguments() unp_id_list = [row[0] for row in read_csv(args.target_ids)] category_list = [row[1] for row in read_csv(args.target_ids)] plt.figure(figsize=(5, 5)) for unp_id, category in list(zip(unp_id_list, category_list)): print(unp_id) sub_dir_list = next(os.walk(os.path.join(args.result_dir)))[1] tprs = [] mean_fpr = np.linspace(0, 1, 100) for sub_dir in sub_dir_list: scores_dir = os.path.join(args.result_dir, sub_dir, '{}'.format(unp_id)) print(scores_dir) if os.path.isdir(scores_dir): os.chdir(scores_dir) if os.path.isdir(scores_dir): for filename in os.listdir(scores_dir): if filename.endswith('.csv'): print(filename) rows = read_csv(filename) scores = [] for row in rows: scores.append([row[0], int(row[1])]) fpr, tpr = Scoring.CalcROC(scores, 1) tpr = np.array(tpr) tprs.append(interp(mean_fpr, fpr, tpr)) if tprs: mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 if (category) == 'easy': plot_curve(fpr=mean_fpr, tpr=mean_tpr, color='blue') elif (category) == 'moderate': plot_curve(fpr=mean_fpr, tpr=mean_tpr, color='orange') elif (category) == 'hard': plot_curve(fpr=mean_fpr, tpr=mean_tpr, color='green') elif (category) == 'unfeasible': plot_curve(fpr=mean_fpr, tpr=mean_tpr, color='magenta') else: print("no info for target {}".format(unp_id)) plt.savefig(os.path.join(args.output_dir, 'avg_roc.png'))
def main(): args = parse_arguments() unp_id_list = [row[0] for row in read_csv(args.target_ids)] category_list = [row[1] for row in read_csv(args.target_ids)] plt.figure(figsize=(5,5)) for unp_id, category in list(zip(unp_id_list, category_list)): scores_dir = os.path.join(args.result_dir, '{}'.format(unp_id)) if os.path.isdir(scores_dir): os.chdir(scores_dir) if os.path.isdir(scores_dir): for filename in os.listdir(scores_dir): if filename.endswith('.csv'): print(filename) rows = read_csv(filename) scores = [] for row in rows: scores.append([row[0], int(row[1])]) print(scores) fpr, tpr = Scoring.CalcROC(scores, 1) tpr = np.array(tpr) print(unp_id) if (category) == 'easy': plot_curve(fpr=fpr, tpr=tpr, color='blue') elif (category) =='moderate': plot_curve(fpr=fpr, tpr=tpr, color='orange') elif (category) == 'hard': plot_curve(fpr=fpr, tpr=tpr, color='green') elif (category) =='unfeasible': plot_curve(fpr=fpr, tpr=tpr, color='magenta') plt.text(0.57, 0.05, args.label) plt.savefig(os.path.join(args.output_dir, 'mlt_roc.png'))
def main(): rows = read_csv('/home/amukhopadhyay/ligand_screener_testing/screening_scores.csv') scores = [] for row in rows: scores.append([row[0], int(row[1])]) #print(scores) rdkit methods #fractions = [0.01, 0.05, 0.1] #print(Scoring.CalcAUC(scores, 1)) #print(Scoring.CalcBEDROC(scores, 1, 20)) #print(Scoring.CalcEnrichment(scores, 1, fractions)) #print(Scoring.CalcRIE(scores, 1, 20)) #print((Scoring.CalcAUC(scores, 1))) #print((Scoring.CalcROC(scores, 1))) rank_stats = StatisticalDescriptors.RankStatistics(scores, activity_column=operator.itemgetter(1)) print(round(rank_stats.EF(0.01), 1)) print(round(rank_stats.EF(0.02), 1)) print(round(rank_stats.EF(0.05), 1)) print(round(rank_stats.EF(0.1), 1)) print(round(rank_stats.AUC(), 1)) print(round(rank_stats.BEDROC(alpha=20), 1)) print(round(rank_stats.RIE(alpha=20), 1)) fpr, tpr = Scoring.CalcROC(scores, 1) roc_auc = metrics.auc(fpr, tpr) plt.title('Receiver Operating Characteristic') plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc) plt.legend(loc = 'lower right') plt.plot([0, 1], [0, 1],'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.savefig('test_roc.png')