def generate_figures(): header_data = np.loadtxt(dev_const.PUB_CONDEL_PREDICTION_RESULT, dtype='S20' )[:, :5] scores_data = np.loadtxt(dev_const.PUB_CONDEL_PREDICTION_RESULT, dtype='S20' )[:, 5:13].astype(np.float) min_value = np.amin(scores_data) max_value = np.amax(scores_data) predictor_names = ('CombiVEP', 'Phylop', 'SIFT', 'PP2', 'LRT', 'MT', 'GERP', 'Condel', ) predictor_colors = ('k', 'm', 'c', 'g', 'b', 'coral', 'darkred', 'r', ) #produce roc data from CombiVEP, Phylop, SIFT, PP2, LRT, MT, GERP, Condel fp_rates, tp_rates = calculate_roc(scores_data[header_data[:, 4] == '1'], scores_data[header_data[:, 4] == '0'], np.linspace(min_value, max_value, 5001)) fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(len(predictor_names)): ax.plot(fp_rates[:, i], tp_rates[:, i], predictor_colors[i], label=predictor_names[i]) ax.set_ylabel('true positive rate') ax.set_xlabel('false positive rate') ax.legend(bbox_to_anchor=(0.9999, 0.0001), loc=4) fig.savefig(dev_const.PUB_ROC_FIG, bbox_inches='tight', pad_inches=0.05) #produce auc data from roc data fig = plt.figure() aucs = [] ind = [] ax = fig.add_subplot(111) for i in xrange(len(predictor_names)): aucs.append(auc(fp_rates[:, i], tp_rates[:, i])) ind.append(0.5*(i+1)-0.4) ax.bar(ind, aucs, 0.3, color=predictor_colors) for i in xrange(len(aucs)): ax.text(ind[i], aucs[i] + 0.01, "%0.3f" % aucs[i]) ax.set_ylim([0.7, 0.9]) ax.set_xticks(np.array(ind) + 0.15) ax.set_xticklabels(predictor_names, rotation=30) fig.savefig(dev_const.PUB_AUC_FIG, bbox_inches='tight', pad_inches=0.05) #plot scores distribution fig = plt.figure() ax = fig.add_subplot(211) hist_range = (-0.005, 1.005) patho_hist, bins = hist(scores_data[header_data[:, 4] == '1'][:, 0], bins=100, range=hist_range) neutr_hist, bins = hist(scores_data[header_data[:, 4] == '0'][:, 0], bins=100, range=hist_range) center = (bins[:-1]+bins[1:]) / 2 ax.plot(center, patho_hist, 'r--', label='pathogenic variants') ax.plot(center, neutr_hist, 'b--', label='neutral variants') ax.set_title('CombiVEP score distributuion') ax.set_ylabel('samples') ax.set_xlabel('score') ax.legend(bbox_to_anchor=(0.999, 0.999), loc=1) ax = fig.add_subplot(212) patho_hist, bins = hist(scores_data[header_data[:, 4] == '1'][:, 7], bins=100, range=hist_range) neutr_hist, bins = hist(scores_data[header_data[:, 4] == '0'][:, 7], bins=100, range=hist_range) center = (bins[:-1]+bins[1:])/2 ax.plot(center, patho_hist, 'r--', label='pathogenic variants') ax.plot(center, neutr_hist, 'b--', label='neutral variants') ax.set_title('Condel score distributuion') ax.set_ylabel('samples') ax.set_xlabel('score') ax.legend(bbox_to_anchor=(0.999, 0.999), loc=1) fig.tight_layout() fig.savefig(dev_const.PUB_SCORES_DISTR_FIG, bbox_inches='tight', pad_inches=0.05) return (dev_const.PUB_ROC_FIG, dev_const.PUB_AUC_FIG, dev_const.PUB_SCORES_DISTR_FIG, )