def plotFigure(score_data=None,release=True): pl.ion() font_size = 13 mpl.rcParams['font.family']="sans-serif" mpl.rcParams['font.sans-serif']="Arial" mpl.rcParams['font.size']=font_size mpl.rcParams['font.weight']='medium' mpl.rcParams['figure.dpi'] = 150 mpl.rcParams['figure.facecolor'] = 'white' mpl.rcParams['lines.linewidth'] = 2 mpl.rcParams['axes.facecolor'] = 'white' mpl.rcParams['patch.edgecolor'] = 'black' color_t = ['#F7977A','#FDC68A','#A2D39C','#6ECFF6','#8493CA','#BC8DBF','#F6989D', '#FFF79A','#998675','#A4A4A4','#5AFF00','#29A3A3','#F53DD6','#F2800D','#3399FF'] #load data val = utils.Validation() datasets = ['humvar','exovar','varibench_selected','predictSNP_selected','swissvar_selected'] fig_names = ['FigureS14','FigureS15','FigureS16','FigureS17','FigureS18'] for i,dataset in enumerate(datasets): score_data.selectDataset(dataset) labels = score_data.getTrueLabels() print "\tCreating ROC and ROC-PR curves for dataset: " + dataset + " (" + fig_names[i] + ")" legend = [] pl.figure(figsize=(12,6)) fig = pl.subplot(121) pl.grid(True) fig.set_xlim([0,1]) fig.set_ylim([0,1]) spines_to_remove = ['top','right'] ax = fig.get_axes() for spine in spines_to_remove: ax.spines[spine].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('fathmm_w')), name="FatHMM-W", legend=legend, color=color_t[5]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('logit_condel_new')), name="Logit+", legend=legend, color=color_t[14]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('condel_new')), name="Condel+", legend=legend, color=color_t[13]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('logit_condel_old')), name="Logit", legend=legend, color=color_t[12]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('condel_old')), name="Condel", legend=legend, color=color_t[11]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('polyphen2')), name="PolyPhen-2", legend=legend, color=color_t[1]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('mutationassessor')), name="MutationAssessor", legend=legend, color=color_t[8]) legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('sift')), name="SIFT", legend=legend, color=color_t[2]) leg = ax.legend(legend,'lower right',numpoints=1,prop={'size':12},fancybox=True) leg.get_frame().set_alpha(0.5) fig.set_xlabel("False Positive Rate or (1-Specificity)") fig.set_ylabel("True Positive Rate or (Sensitivity)") fig.plot([0,1],[0,1],'--',color='#ACACAC') fig.text(-0.1,1.05,"a",fontsize=14,fontweight='bold',va='top',transform=fig.transAxes) fig = pl.subplot(122) legend = [] pl.grid(True) fig.set_xlim([0,1]) fig.set_ylim([0,1]) spines_to_remove = ['top','right'] ax = fig.get_axes() for spine in spines_to_remove: ax.spines[spine].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('fathmm_w')), name="FatHMM-W", legend=legend, color=color_t[5]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('logit_condel_new')), name="Logit+", legend=legend, color=color_t[14]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('condel_new')), name="Condel+", legend=legend, color=color_t[13]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('logit_condel_old')), name="Logit", legend=legend, color=color_t[12]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('condel_old')), name="Condel", legend=legend, color=color_t[11]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('polyphen2')), name="PolyPhen-2", legend=legend, color=color_t[1]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('mutationassessor')), name="MutationAssessor", legend=legend, color=color_t[8]) legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('sift')), name="SIFT", legend=legend, color=color_t[2]) leg = ax.legend(legend,'lower right',numpoints=1,prop={'size':12},fancybox=True) leg.get_frame().set_alpha(0.5) fig.set_xlabel("Recall") fig.set_ylabel("Precision") fig.text(-0.1,1.05,"b",fontsize=14,fontweight='bold',va='top',transform=fig.transAxes) pl.subplots_adjust(left=0.05,bottom=0.08,right=0.99,top=0.93,wspace=0.11) if release: pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.pdf')) else: pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.pdf')) pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.tiff'),dpi=300) pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.jpg')) pl.close()
def plotFigure(score_data=None, release=True): pl.ion() font_size = 10 mpl.rcParams['font.family'] = "sans-serif" mpl.rcParams['font.sans-serif'] = "Arial" mpl.rcParams['font.size'] = font_size mpl.rcParams['font.weight'] = 'medium' mpl.rcParams['figure.dpi'] = 150 mpl.rcParams['figure.facecolor'] = 'white' mpl.rcParams['lines.linewidth'] = 2 mpl.rcParams['axes.facecolor'] = 'white' mpl.rcParams['patch.edgecolor'] = 'black' color_t = [ '#F7977A', '#FDC68A', '#A2D39C', '#6ECFF6', '#8493CA', '#BC8DBF', '#F6989D', '#FFF79A', '#998675', '#A4A4A4' ] val = utils.Validation() categories = ['cat1', 'cat2', 'cat3', 'cat4', 'cat5'] n = len(categories) mt = sp.zeros(n) pp2 = sp.zeros(n) mass = sp.zeros(n) cadd = sp.zeros(n) sift = sp.zeros(n) lrt = sp.zeros(n) fathmmu = sp.zeros(n) fathmmw = sp.zeros(n) gerp = sp.zeros(n) phylop = sp.zeros(n) score_data.selectDataset('varibench_selected') score_data.loadCategories() for i, cat in enumerate(categories): if cat == "all": labels = score_data.getTrueLabels() mt[i] = val.getROCStats( labels, score_data.getScores('mutationtaster'))['auc'] pp2[i] = val.getROCStats(labels, score_data.getScores('polyphen2'))['auc'] mass[i] = val.getROCStats( labels, score_data.getScores('mutationassessor'))['auc'] cadd[i] = val.getROCStats(labels, score_data.getScores('CADD'))['auc'] sift[i] = val.getROCStats(labels, score_data.getScores('sift'))['auc'] fathmmu[i] = val.getROCStats( labels, score_data.getScores('fathmm_u'))['auc'] fathmmw[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] gerp[i] = val.getROCStats(labels, score_data.getScores('gerp++'))['auc'] phylop[i] = val.getROCStats(labels, score_data.getScores('phylop'))['auc'] lrt[i] = val.getROCStats(labels, score_data.getScores('lrt'))['auc'] else: [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='mutationtaster') mt[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='polyphen2') pp2[i] = val.getROCStats(labels, scores)['auc'] [labels, scores ] = score_data.getData4Categorie(category=cat, tool_name='mutationassessor') mass[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='CADD') cadd[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='sift') sift[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_u') fathmmu[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_w') fathmmw[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='gerp++') gerp[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='phylop') phylop[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='lrt') lrt[i] = val.getROCStats(labels, scores)['auc'] pl.figure(figsize=(12, 8)) fig = pl.subplot(311) font = FontProperties() font.set_weight('bold') pl.plot(fathmmw, 'o-', color=color_t[0]) pl.plot(fathmmu, 'h-', color=color_t[1]) pl.plot(mt, 'x-', color=color_t[2]) pl.plot(mass, '<-', color=color_t[8]) pl.plot(pp2, '>-', color=color_t[3]) pl.plot(cadd, 'D-', color=color_t[9]) pl.plot(sift, '1-', color=color_t[4]) pl.plot(lrt, '2-', color=color_t[5]) pl.plot(gerp, '3-', color=color_t[6]) pl.plot(phylop, '4-', color=color_t[7]) pl.ylim(0.53, 0.7) pl.xlim(-.1, 4.1) pl.yticks([0.53, 0.6, 0.7]) pl.grid(axis='y') pl.ylabel("AUC") pl.xticks(sp.arange(5), [ ']0.0,1.0[', '[0.1,0.9]', '[0.2,0.8]', '[0.3,0.7]', '[0.4,0.6]', ], fontsize=font_size, rotation=90) leg = pl.legend([ 'FatHMM-W', 'FatHMM-U', 'MT2', 'MASS', 'PP2', 'CADD', 'SIFT', 'LRT', 'GERP++', 'pyhloP' ], loc='upper right', fancybox=True, ncol=5, prop={'size': 10}, numpoints=1) leg.get_frame().set_alpha(0.2) leg.get_frame().set_edgecolor("none") fig.text(-0.05, 1.02, "a", fontsize=15, fontweight="bold", va="top", transform=fig.transAxes) pl.title("VariBenchSelected") remove_border() mt = sp.zeros(n) pp2 = sp.zeros(n) mass = sp.zeros(n) cadd = sp.zeros(n) sift = sp.zeros(n) lrt = sp.zeros(n) fathmmu = sp.zeros(n) fathmmw = sp.zeros(n) gerp = sp.zeros(n) phylop = sp.zeros(n) score_data.selectDataset('predictSNP_selected') score_data.loadCategories() for i, cat in enumerate(categories): if cat == "all": labels = score_data.getTrueLabels() mt[i] = val.getROCStats( labels, score_data.getScores('mutationtaster'))['auc'] pp2[i] = val.getROCStats(labels, score_data.getScores('polyphen2'))['auc'] mass[i] = val.getROCStats( labels, score_data.getScores('mutationassessor'))['auc'] cadd[i] = val.getROCStats(labels, score_data.getScores('CADD'))['auc'] sift[i] = val.getROCStats(labels, score_data.getScores('sift'))['auc'] fathmmu[i] = val.getROCStats( labels, score_data.getScores('fathmm_u'))['auc'] fathmmw[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] gerp[i] = val.getROCStats(labels, score_data.getScores('gerp++'))['auc'] phylop[i] = val.getROCStats(labels, score_data.getScores('phylop'))['auc'] lrt[i] = val.getROCStats(labels, score_data.getScores('lrt'))['auc'] else: [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='mutationtaster') mt[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='polyphen2') pp2[i] = val.getROCStats(labels, scores)['auc'] [labels, scores ] = score_data.getData4Categorie(category=cat, tool_name='mutationassessor') mass[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='CADD') cadd[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='sift') sift[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_u') fathmmu[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_w') fathmmw[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='gerp++') gerp[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='phylop') phylop[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='lrt') lrt[i] = val.getROCStats(labels, scores)['auc'] fig = pl.subplot(312) font = FontProperties() font.set_weight('bold') pl.plot(fathmmw, 'o-', color=color_t[0]) pl.plot(fathmmu, 'h-', color=color_t[1]) pl.plot(mt, 'x-', color=color_t[2]) pl.plot(mass, '<-', color=color_t[8]) pl.plot(pp2, '>-', color=color_t[3]) pl.plot(cadd, 'D-', color=color_t[9]) pl.plot(sift, '1-', color=color_t[4]) pl.plot(lrt, '2-', color=color_t[5]) pl.plot(gerp, '3-', color=color_t[6]) pl.plot(phylop, '4-', color=color_t[7]) pl.ylim(0.55, 0.8) pl.xlim(-.1, 4.1) pl.yticks([0.55, 0.6, 0.7, 0.8]) pl.grid(axis='y') pl.ylabel("AUC") pl.xticks(sp.arange(5), [ ']0.0,1.0[', '[0.1,0.9]', '[0.2,0.8]', '[0.3,0.7]', '[0.4,0.6]', ], fontsize=font_size, rotation=90) fig.text(-0.05, 1.02, "b", fontsize=15, fontweight="bold", va="top", transform=fig.transAxes) pl.title("predictSNPSelected") remove_border() mt = sp.zeros(n) pp2 = sp.zeros(n) mass = sp.zeros(n) cadd = sp.zeros(n) sift = sp.zeros(n) lrt = sp.zeros(n) fathmmu = sp.zeros(n) fathmmw = sp.zeros(n) gerp = sp.zeros(n) phylop = sp.zeros(n) score_data.selectDataset('swissvar_selected') score_data.loadCategories() for i, cat in enumerate(categories): if cat == "all": labels = score_data.getTrueLabels() mt[i] = val.getROCStats( labels, score_data.getScores('mutationtaster'))['auc'] pp2[i] = val.getROCStats(labels, score_data.getScores('polyphen2'))['auc'] mass[i] = val.getROCStats( labels, score_data.getScores('mutationassessor'))['auc'] cadd[i] = val.getROCStats(labels, score_data.getScores('CADD'))['auc'] sift[i] = val.getROCStats(labels, score_data.getScores('sift'))['auc'] fathmmu[i] = val.getROCStats( labels, score_data.getScores('fathmm_u'))['auc'] fathmmw[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] gerp[i] = val.getROCStats(labels, score_data.getScores('gerp++'))['auc'] phylop[i] = val.getROCStats(labels, score_data.getScores('phylop'))['auc'] lrt[i] = val.getROCStats(labels, score_data.getScores('lrt'))['auc'] else: [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='mutationtaster') mt[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='polyphen2') pp2[i] = val.getROCStats(labels, scores)['auc'] [labels, scores ] = score_data.getData4Categorie(category=cat, tool_name='mutationassessor') mass[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='CADD') cadd[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='sift') sift[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_u') fathmmu[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_w') fathmmw[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='gerp++') gerp[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='phylop') phylop[i] = val.getROCStats(labels, scores)['auc'] [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='lrt') lrt[i] = val.getROCStats(labels, scores)['auc'] fig = pl.subplot(313) font = FontProperties() font.set_weight('bold') pl.plot(fathmmw, 'o-', color=color_t[0]) pl.plot(fathmmu, 'h-', color=color_t[1]) pl.plot(mt, 'x-', color=color_t[2]) pl.plot(mass, '<-', color=color_t[8]) pl.plot(pp2, '>-', color=color_t[3]) pl.plot(cadd, 'D-', color=color_t[9]) pl.plot(sift, '1-', color=color_t[4]) pl.plot(lrt, '2-', color=color_t[5]) pl.plot(gerp, '3-', color=color_t[6]) pl.plot(phylop, '4-', color=color_t[7]) pl.ylim(0.55, 0.73) pl.xlim(-.1, 4.1) pl.yticks([0.55, 0.6, 0.7, 0.75]) pl.grid(axis='y') pl.ylabel("AUC") pl.xticks(sp.arange(5), [ ']0.0,1.0[', '[0.1,0.9]', '[0.2,0.8]', '[0.3,0.7]', '[0.4,0.6]', ], fontsize=font_size, rotation=90) remove_border() fig.text(-0.05, 1.02, "c", fontsize=15, fontweight="bold", va="top", transform=fig.transAxes) pl.title("SwissVarSelected") pl.subplots_adjust(left=0.06, bottom=0.11, right=0.98, top=0.91, wspace=0.03, hspace=0.5) if release: pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.pdf')) else: pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.pdf')) pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.tiff'), dpi=300) pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.jpg')) pl.close()
def plotFigure(score_data=None,release=True): pl.ion() font_size = 13 rc('font',**{'family':'sans-serif','sans-serif':['Arial']}) color_t = ['#F7977A','#FDC68A','#A2D39C','#6ECFF6','#8493CA','#BC8DBF','#F6989D','#FFF79A','#998675','#A4A4A4','#5AFF00','#29A3A3'] hatch = pl.Rectangle((0,0),1,1,fill=None,hatch="///") circle = pl.Rectangle((0,0),1,1,fill=None,hatch="ooo") #load data val = utils.Validation() mv_predictor = utils.ProteinMajorityVote() lr_predictor = utils.LogisticRegression() biased_map = {'mutationtaster':['humvar','exovar','varibench', 'varibench_selected','predictSNP_selected','swissvar_selected'], 'mutationassessor':['humvar','exovar','varibench'], 'polyphen2':['humvar','exovar','varibench'], 'sift':[''], 'fathmm_u':[''], 'fathmm_w':['humvar','exovar','varibench'], 'gerp++':[''], 'phylop':[''], 'CADD':[''] } datasets = ['humvar','exovar','varibench_selected','predictSNP_selected','swissvar_selected'] n_datasets = len(datasets) mt_biased = np.zeros(n_datasets) mt = np.zeros(n_datasets) mass_biased = np.zeros(n_datasets) mass = np.zeros(n_datasets) pp2_biased = np.zeros(n_datasets) pp2 = np.zeros(n_datasets) fathmmw = np.zeros(n_datasets) fathmmw_biased = np.zeros(n_datasets) fathmmw_type2_biased = np.zeros(n_datasets) sift = np.zeros(n_datasets) lrt = np.zeros(n_datasets) fathmmu = np.zeros(n_datasets) gerp = np.zeros(n_datasets) phylop = np.zeros(n_datasets) cadd = np.zeros(n_datasets) mv = np.zeros(n_datasets) mv_biased = np.zeros(n_datasets) features = np.zeros(n_datasets) features_biased = np.zeros(n_datasets) features_type2_biased = np.zeros(n_datasets) for i,dataset in enumerate(datasets): score_data.selectDataset(dataset) labels = score_data.getTrueLabels() print "\tPerforming a Logistic Regression over the weighting features of FatHMM-W for dataset: " + dataset lr_value = lr_predictor.run(true_labels=labels,features=score_data.getFatHMMFeatures(),folds=10)['auc'] print "\tComputing AUC values for dataset: " + dataset if dataset in biased_map['mutationtaster']: mt_biased[i] = val.getROCStats(labels,score_data.getScores('mutationtaster'))['auc'] else: mt[i] = val.getROCStats(labels,score_data.getScores('mutationtaster'))['auc'] if dataset in biased_map['mutationassessor']: mass_biased[i] = val.getROCStats(labels,score_data.getScores('mutationassessor'))['auc'] else: mass[i] = val.getROCStats(labels,score_data.getScores('mutationassessor'))['auc'] if dataset in biased_map['polyphen2']: pp2_biased[i] = val.getROCStats(labels,score_data.getScores('polyphen2'))['auc'] else: pp2[i] = val.getROCStats(labels,score_data.getScores('polyphen2'))['auc'] if dataset in biased_map['fathmm_w']: fathmmw_biased[i] = val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc'] features_biased[i] = lr_value else: if dataset == "varibench_selected" or dataset=="predictSNP_selected" or dataset=="swissvar_selected": fathmmw_type2_biased[i] = val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc'] features_type2_biased[i] = lr_value else: fathmmw[i] = val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc'] features[i] = lr_value fathmmu[i] = val.getROCStats(labels,score_data.getScores('fathmm_u'))['auc'] sift[i] = val.getROCStats(labels,score_data.getScores('sift'))['auc'] lrt[i] = val.getROCStats(labels,score_data.getScores('lrt'))['auc'] gerp[i] = val.getROCStats(labels,score_data.getScores('gerp++'))['auc'] phylop[i] = val.getROCStats(labels,score_data.getScores('phylop'))['auc'] cadd[i] = val.getROCStats(labels,score_data.getScores('CADD'))['auc'] print "\tPerforming a Protein Majority Vote for dataset: " + dataset mv_biased[i] = mv_predictor.getMV4Dataset(true_labels=labels,proteins=score_data.getUniprotIDs(),folds=10)['auc'] pl.figure(figsize=(15,5)) fig1 = pl.subplot(111) width = 0.05 x=np.arange(n_datasets) tool_names = np.array(['FatHMM-W','MutationTaster-2','PolyPhen-2','MutationAssessor','CADD','SIFT','LRT','FatHMM-U','Gerp++','phyloP','Features ln(Wn), ln(Wd)','Protein Majority Vote','Potentially Type 1 Biased','Potentially Type 2 Biased']) spines_to_remove = ['top','right','bottom'] ax = fig1.get_axes() for spine in spines_to_remove: ax.spines[spine].set_visible(False) ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('left') ax.yaxis.grid(True) t0 = fig1.bar(x-width/2.0-5*width-0.1,fathmmw,width=width,color=color_t[5]) t1 = fig1.bar(x-width/2.0-4*width-0.08,mt,width=width,color=color_t[0]) t2 = fig1.bar(x-width/2.0-3*width-0.06,pp2,width=width,color=color_t[1]) t3 = fig1.bar(x-width/2.0-2*width-0.04,mass,width=width,color=color_t[8]) t4 = fig1.bar(x-width/2.0-width-0.02,cadd,width=width,color=color_t[9]) t5 = fig1.bar(x-width/2.0,sift,width=width,color=color_t[2]) t6 = fig1.bar(x+width/2.0+0.02,lrt,width=width,color=color_t[3]) t7 = fig1.bar(x+width/2.0+width+0.04,fathmmu,width=width,color=color_t[4]) t8 = fig1.bar(x+width/2.0+2*width+0.06,gerp,width=width,color=color_t[6]) t9 = fig1.bar(x+width/2.0+3*width+0.08,phylop,width=width,color=color_t[7]) t10 = fig1.bar(x+width/2.0+4*width+0.1,features,width=width,color=color_t[11]) t11 = fig1.bar(x+width/2.0+5*width+0.12,mv,width=width,color=color_t[10]) #fig1.text(-0.05,1.02,"b",fontsize=15,fontweight="bold",va="top",transform=fig1.transAxes) light_grey = np.array([float(248)/float(255)]*3) light_grey = "#FFFFFF" almost_black = '#262626' legend = ax.legend([t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,hatch,circle],tool_names,frameon=True, scatterpoints=1, prop={'size':11},ncol=7,loc="upper center",fancybox=True,bbox_to_anchor=(0.5, 1.02)) legend.get_frame().set_alpha(0.5) rect = legend.get_frame() rect.set_facecolor(light_grey) rect.set_linewidth(0.0) # Change the legend label colors to almost black, too texts = legend.texts for t in texts: t.set_color(almost_black) fig1.bar(x-width/2.0-5*width-0.1,fathmmw_biased,width=width,color=color_t[5],hatch="/o/o/") fig1.bar(x-width/2.0-5*width-0.1,fathmmw_type2_biased,width=width,color=color_t[5],hatch="ooo") fig1.bar(x-width/2.0-4*width-0.08,mt_biased,width=width,color=color_t[0],hatch="///") fig1.bar(x-width/2.0-3*width-0.06,pp2_biased,width=width,color=color_t[1],hatch="///") fig1.bar(x-width/2.0-2*width-0.04,mass_biased,width=width,color=color_t[8],hatch="///") fig1.bar(x+width/2.0+4*width+0.1,features_biased,width=width,color=color_t[11],hatch="/o/o/") fig1.bar(x+width/2.0+4*width+0.1,features_type2_biased,width=width,color=color_t[11],hatch="ooo") fig1.bar(x+width/2.0+5*width+0.12,mv_biased,width=width,color=color_t[10],hatch="ooo") pl.xticks(x,['HumVar','ExoVar','VariBenchSelected','predictSNPSelected','SwissVarSelected'],fontsize=font_size) fig1.set_ylabel("AUC") fig1.set_ylim(0.5,1.06) fig1.set_xlim(-0.5,n_datasets-0.5) pl.yticks([0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,1.0]) pl.subplots_adjust(left=0.06,bottom=0.07,right=0.99,top=0.99,wspace=0.05) if release: pl.savefig(os.path.abspath('Output/Figures/Figure1.tiff'),dpi=300) else: pl.savefig(os.path.abspath('Output/Figures/Figure1.pdf')) pl.savefig(os.path.abspath('Output/Figures/Figure1.tiff'),dpi=300) pl.savefig(os.path.abspath('Output/Figures/Figure1.jpg')) pl.close()
def plotFigure(score_data=None, release=True): pl.ion() font_size = 13 mpl.rcParams['font.family'] = "sans-serif" mpl.rcParams['font.sans-serif'] = "Arial" mpl.rcParams['font.size'] = font_size mpl.rcParams['font.weight'] = 'medium' mpl.rcParams['figure.dpi'] = 150 mpl.rcParams['figure.facecolor'] = 'white' mpl.rcParams['lines.linewidth'] = 2 mpl.rcParams['axes.facecolor'] = 'white' mpl.rcParams['font.size'] = font_size mpl.rcParams['patch.edgecolor'] = 'black' color_t = [ '#F7977A', '#FDC68A', '#A2D39C', '#6ECFF6', '#8493CA', '#BC8DBF', '#F6989D', '#FFF79A', '#998675', '#A4A4A4', '#5AFF00', '#29A3A3', '#F53DD6', '#F2800D', '#3399FF' ] hatch = pl.Rectangle((0, 0), 1, 1, fill=None, hatch="///") circle = pl.Rectangle((0, 0), 1, 1, fill=None, hatch="ooo") #load data val = utils.Validation() biased_map = { 'mutationtaster': [ 'humvar', 'exovar', 'varibench', 'varibench_selected', 'swissvar_selected' ], 'mutationassessor': ['humvar', 'exovar', 'varibench'], 'polyphen2': ['humvar', 'exovar', 'varibench'], 'logit_condel_old': ['humvar', 'exovar', 'varibench'], 'logit_condel_new': ['humvar', 'exovar', 'varibench'], 'condel_old': ['humvar', 'exovar', 'varibench'], 'condel_new': ['humvar', 'exovar', 'varibench'], 'sift': [''], 'fathmm_u': [''], 'fathmm_w': ['humvar', 'exovar', 'varibench'], 'gerp++': [''], 'phylop': [''], 'CADD': [''] } datasets = [ 'humvar', 'exovar', 'varibench_selected', 'predictSNP_selected', 'swissvar_selected' ] n = len(datasets) mass_biased = np.zeros(n) mass = np.zeros(n) pp2 = np.zeros(n) pp2_biased = np.zeros(n) sift = np.zeros(n) logit_biased = np.zeros(n) logit = np.zeros(n) condel_biased = np.zeros(n) condel = np.zeros(n) fathmmw = np.zeros(n) fathmmw_biased = np.zeros(n) fathmmw_type2_biased = np.zeros(n) logit_p_biased = np.zeros(n) logit_p_type2_biased = np.zeros(n) logit_p = np.zeros(n) condel_p_biased = np.zeros(n) condel_p_type2_biased = np.zeros(n) condel_p = np.zeros(n) for i, dataset in enumerate(datasets): score_data.selectDataset(dataset) labels = score_data.getTrueLabels() print "\tComputing AUC values for dataset: " + dataset if dataset in biased_map['mutationassessor']: mass_biased[i] = val.getROCStats( labels, score_data.getScores('mutationassessor'))['auc'] else: mass[i] = val.getROCStats( labels, score_data.getScores('mutationassessor'))['auc'] if dataset in biased_map['polyphen2']: pp2_biased[i] = val.getROCStats( labels, score_data.getScores('polyphen2'))['auc'] else: pp2[i] = val.getROCStats(labels, score_data.getScores('polyphen2'))['auc'] if dataset in biased_map['logit_condel_old']: logit_biased[i] = val.getROCStats( labels, score_data.getScores('logit_condel_old'))['auc'] else: logit[i] = val.getROCStats( labels, score_data.getScores('logit_condel_old'))['auc'] if dataset in biased_map['condel_old']: condel_biased[i] = val.getROCStats( labels, score_data.getScores('condel_old'))['auc'] else: condel[i] = val.getROCStats( labels, score_data.getScores('condel_old'))['auc'] if dataset in biased_map['fathmm_w']: fathmmw_biased[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] else: if dataset == "varibench_selected" or dataset == "predictSNP_selected" or dataset == "swissvar_selected": fathmmw_type2_biased[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] else: fathmmw[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] if dataset in biased_map['condel_new']: condel_p_biased[i] = val.getROCStats( labels, score_data.getScores('condel_new'))['auc'] else: if dataset == "varibench_selected" or dataset == "predictSNP_selected" or dataset == "swissvar_selected": condel_p_type2_biased[i] = val.getROCStats( labels, score_data.getScores('condel_new'))['auc'] else: condel_p[i] = val.getROCStats( labels, score_data.getScores('condel_new'))['auc'] if dataset in biased_map['logit_condel_new']: logit_p_biased[i] = val.getROCStats( labels, score_data.getScores('logit_condel_new'))['auc'] else: if dataset == "varibench_selected" or dataset == "predictSNP_selected" or dataset == "swissvar_selected": logit_p_type2_biased[i] = val.getROCStats( labels, score_data.getScores('logit_condel_new'))['auc'] else: logit_p[i] = val.getROCStats( labels, score_data.getScores('logit_condel_new'))['auc'] sift[i] = val.getROCStats(labels, score_data.getScores('sift'))['auc'] pl.figure(figsize=(10, 5)) fig1 = pl.subplot(111) width = 0.07 x = np.arange(n) tool_names = np.array([ 'FatHMM-W', 'PolyPhen-2', 'MutationAssessor', 'SIFT', 'Condel', 'Logit', 'Condel+', 'Logit+', 'Type 1 Biased', 'Type 2 Biased' ]) spines_to_remove = ['top', 'right', 'bottom'] ax = fig1.get_axes() for spine in spines_to_remove: ax.spines[spine].set_visible(False) ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('left') ax.yaxis.grid(True) t0 = fig1.bar(x - width / 2.0 - 3 * width - 0.06, fathmmw, width=width, color=color_t[5]) t1 = fig1.bar(x - width / 2.0 - 2 * width - 0.04, pp2, width=width, color=color_t[1]) t2 = fig1.bar(x - width / 2.0 - width - 0.02, mass, width=width, color=color_t[8]) t3 = fig1.bar(x - width / 2.0, sift, width=width, color=color_t[2]) t4 = fig1.bar(x + width / 2.0 + 0.02, condel, width=width, color=color_t[11]) t5 = fig1.bar(x + width / 2.0 + width + 0.04, logit, width=width, color=color_t[12]) t6 = fig1.bar(x + width / 2.0 + 2.0 * width + 0.06, condel_p, width=width, color=color_t[13]) t7 = fig1.bar(x + width / 2.0 + 3.0 * width + 0.08, logit_p, width=width, color=color_t[14]) light_grey = np.array([float(248) / float(255)] * 3) light_grey = "#FFFFFF" almost_black = '#262626' legend = ax.legend([t0, t1, t2, t3, t4, t5, t6, t7, hatch, circle], tool_names, frameon=True, scatterpoints=1, prop={'size': font_size}, ncol=5, loc="upper center", fancybox=True, bbox_to_anchor=(0.5, 1.0)) rect = legend.get_frame() rect.set_facecolor(light_grey) rect.set_linewidth(0.0) texts = legend.texts for t in texts: t.set_color(almost_black) fig1.bar(x - width / 2.0 - 3 * width - 0.06, fathmmw_biased, width=width, color=color_t[5], hatch="/o/o/") fig1.bar(x - width / 2.0 - 3 * width - 0.06, fathmmw_type2_biased, width=width, color=color_t[5], hatch="ooo") fig1.bar(x - width / 2.0 - 2 * width - 0.04, pp2_biased, width=width, color=color_t[1], hatch="///") fig1.bar(x - width / 2.0 - width - 0.02, mass_biased, width=width, color=color_t[8], hatch="///") fig1.bar(x + width / 2.0 + 0.02, condel_biased, width=width, color=color_t[11], hatch="///") fig1.bar(x + width / 2.0 + width + 0.04, logit_biased, width=width, color=color_t[12], hatch="///") fig1.bar(x + width / 2.0 + 2.0 * width + 0.06, condel_p_biased, width=width, color=color_t[13], hatch="/o/o/") fig1.bar(x + width / 2.0 + 3.0 * width + 0.08, logit_p_type2_biased, width=width, color=color_t[14], hatch="ooo") fig1.bar(x + width / 2.0 + 2.0 * width + 0.06, condel_p_type2_biased, width=width, color=color_t[13], hatch="ooo") fig1.bar(x + width / 2.0 + 3.0 * width + 0.08, logit_p_biased, width=width, color=color_t[14], hatch="/o/o/") pl.xticks(x, [ 'HumVar', 'ExoVar', 'VaribenchSelected', 'predictSNPSelected', 'SwissVarSelected' ], fontsize=font_size) pl.ylabel("AUC") fig1.set_ylim(0.5, 1.03) fig1.set_xlim(-0.5, n - 0.5) fig1.text(-0.05, 1.02, "a", fontsize=15, fontweight="bold", va="top", transform=fig1.transAxes) pl.yticks([0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]) pl.subplots_adjust(left=0.07, bottom=0.07, right=0.99, top=0.97, wspace=0.05) if release: pl.savefig(os.path.abspath('Output/Figures/Figure4.tiff'), dpi=300) else: pl.savefig(os.path.abspath('Output/Figures/Figure4.pdf')) pl.savefig(os.path.abspath('Output/Figures/Figure4.tiff'), dpi=300) pl.savefig(os.path.abspath('Output/Figures/Figure4.jpg')) pl.close()
def printTableS1(score_data=None): val = utils.Validation() f = open(os.path.abspath("Output/Supplementary/tableS1.csv"), 'w') f.write(";;MT2;PP2;MASS;CADD;SIFT;LRT;FatHMM-U;FatHMM-W;Gerp++;phyloP\n") datasets = [ 'humvar', 'exovar', 'varibench_selected', 'predictSNP_selected', 'swissvar_selected' ] for i, dataset in enumerate(datasets): score_data.selectDataset(dataset) labels = score_data.getTrueLabels() string = dataset + ";TP;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['TP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['TP'] f.write(string + "\n") string = ";FP;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['FP'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['FP'] f.write(string + "\n") string = ";TN;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['TN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['TN'] f.write(string + "\n") string = ";FN;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['FN'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['FN'] f.write(string + "\n") string = ";AUC;" string += "%.2f" % val.getROCStats( labels, score_data.getScores('mutationtaster'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('polyphen2'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('mutationassessor'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('CADD'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('sift'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('lrt'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('fathmm_u'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('gerp++'))['auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('phylop'))['auc'] f.write(string + "\n") string = ";AUC-PR;" string += "%.2f" % val.getROCStats( labels, score_data.getScores('mutationtaster'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('polyphen2'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('mutationassessor'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('CADD'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('sift'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('lrt'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('fathmm_u'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('fathmm_w'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('gerp++'))['pr_auc'] + ";" string += "%.2f" % val.getROCStats( labels, score_data.getScores('phylop'))['pr_auc'] f.write(string + "\n") string = ";Accuracy;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels( 'mutationassessor'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['accuracy'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['accuracy'] f.write(string + "\n") string = ";F-Score;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels( 'mutationassessor'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['f1_score'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['f1_score'] f.write(string + "\n") string = ";MCC;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['mcc'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['mcc'] f.write(string + "\n") string = ";Precision/Positive Predictive Value;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels( 'mutationassessor'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['precision'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['precision'] f.write(string + "\n") string = ";Recall/Sensitivity;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['recall'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['recall'] f.write(string + "\n") string = ";Specificity;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels( 'mutationtaster'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels( 'mutationassessor'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['specificity'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['specificity'] f.write(string + "\n") string = ";Negative Predictive Value;" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationtaster'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('polyphen2'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('mutationassessor'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('CADD'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('sift'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('lrt'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_u'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('fathmm_w'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('gerp++'))['npv'] + ";" string += "%.2f" % val.getPredictionStats( labels, score_data.getPredictedLabels('phylop'))['npv'] f.write(string + "\n") f.close()
def plotFigure(score_data=None, release=True): pl.ion() font_size = 10 mpl.rcParams['font.family'] = "sans-serif" mpl.rcParams['font.sans-serif'] = "Arial" mpl.rcParams['font.size'] = font_size mpl.rcParams['figure.dpi'] = 150 mpl.rcParams['font.weight'] = 'medium' mpl.rcParams['figure.facecolor'] = 'white' mpl.rcParams['lines.linewidth'] = 0.8 mpl.rcParams['axes.facecolor'] = 'white' mpl.rcParams['font.size'] = font_size mpl.rcParams['patch.edgecolor'] = 'black' val = utils.Validation() categories = ['all', 'pure', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5'] humvar = sp.zeros(7) exovar = sp.zeros(7) varibench_selected = sp.zeros(7) predictSNP_selected = sp.zeros(7) swissvar_selected = sp.zeros(7) datasets = [ 'humvar', 'exovar', 'varibench_selected', 'predictSNP_selected', 'swissvar_selected' ] for dataset in datasets: score_data.selectDataset(dataset) score_data.loadCategories() for i, cat in enumerate(categories): if cat == "all": labels = score_data.getTrueLabels() if dataset == 'humvar': humvar[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] elif dataset == 'exovar': exovar[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] elif dataset == 'swissvar_selected': swissvar_selected[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] elif dataset == 'predictSNP_selected': predictSNP_selected[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] else: varibench_selected[i] = val.getROCStats( labels, score_data.getScores('fathmm_w'))['auc'] else: [labels, scores] = score_data.getData4Categorie(category=cat, tool_name='fathmm_w') if dataset == 'humvar': humvar[i] = val.getROCStats(labels, scores)['auc'] elif dataset == 'exovar': exovar[i] = val.getROCStats(labels, scores)['auc'] elif dataset == 'predictSNP_selected': predictSNP_selected[i] = val.getROCStats(labels, scores)['auc'] elif dataset == 'swissvar_selected': swissvar_selected[i] = val.getROCStats(labels, scores)['auc'] else: varibench_selected[i] = val.getROCStats(labels, scores)['auc'] pl.figure(figsize=(10, 7)) font = FontProperties() font.set_weight('bold') plotBar(humvar, 0, 321, "HumVar", flag=False) ax = plotBar(exovar, 1, 322, "ExoVar", flag=True) plotBar(varibench_selected, 3, 323, "VariBenchSelected", flag=False) plotBar(predictSNP_selected, 4, 324, "predictSNPSelected", flag=True) plotBar(swissvar_selected, 5, 325, "SwissVarSelected", flag=False) rect = pl.Rectangle((0, 0), 1, 1, fill=None) leg = ax.legend([rect, hatch, circle], ['FatHMM-W', 'Type 1 Biased', 'Type 2 Biased'], loc='upper center', bbox_to_anchor=(0.0, 1.15), fancybox=True, ncol=5, prop={'size': 10}, numpoints=1) leg.get_frame().set_alpha(0.2) leg.get_frame().set_edgecolor("none") pl.subplots_adjust(left=0.065, bottom=0.11, right=0.99, top=0.94, wspace=0.03, hspace=0.5) if release: pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.pdf')) else: pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.pdf')) pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.tiff'), dpi=300) pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.jpg')) pl.close()
def printTableS2(score_data=None): val = utils.Validation() mv_predictor = utils.ProteinMajorityVote() lr_predictor = utils.LogisticRegression() f = open(os.path.abspath("Output/Supplementary/tableS2.csv"),'w') f.write(";;FatHMM-W;Logistic Regression over the features ln(Wn) & ln(Wd);Protein Majority Vote (MV)\n") datasets = ['humvar','exovar','varibench_selected','predictSNP_selected','swissvar_selected'] for i,dataset in enumerate(datasets): score_data.selectDataset(dataset) labels = score_data.getTrueLabels() print "\tTraining Logistic Regression on Features ln(Wn) and ln(Wd)" lr = lr_predictor.run(true_labels=labels,features=score_data.getFatHMMFeatures(),folds=10) print "\tPerforming a Protein Majority Vote for dataset: " + dataset mv = mv_predictor.getMV4Dataset(true_labels=labels,proteins=score_data.getUniprotIDs(),folds=10) string = dataset + ";AUC;" string += "%.2f"% val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc'] + ";" string += "%.2f (%.2f)"% (lr['auc'],lr['auc_std']) + ";" string += "%.2f (%.2f)"% (mv['auc'],mv['auc_std']) f.write(string + "\n") string = ";AUC-PR;" string += "%.2f"% val.getROCStats(labels,score_data.getScores('fathmm_w'))['pr_auc'] + ";" string += "%.2f (%.2f)"% (lr['auc_pr'],lr['auc_pr_std']) + ";" string += "%.2f (%.2f)"% (mv['auc_pr'],mv['auc_pr_std']) f.write(string + "\n") string = ";Accuracy;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['accuracy'] + ";" string += "%.2f (%.2f)"% (lr['accuracy'],lr['accuracy_std']) + ";" string += "%.2f (%.2f)"% (mv['accuracy'],mv['accuracy_std']) f.write(string + "\n") string = ";F1-Score;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['f1_score'] + ";" string += "%.2f (%.2f)"% (lr['f1_score'],lr['f1_score_std']) + ";" string += "%.2f (%.2f)"% (mv['f1_score'],mv['f1_score_std']) f.write(string + "\n") string = ";MCC;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['mcc'] + ";" string += "%.2f (%.2f)"% (lr['mcc'],lr['mcc_std']) + ";" string += "%.2f (%.2f)"% (mv['mcc'],mv['mcc_std']) f.write(string + "\n") string = ";Precision;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['precision'] + ";" string += "%.2f (%.2f)"% (lr['precision'],lr['precision_std']) + ";" string += "%.2f (%.2f)"% (mv['precision'],mv['precision_std']) f.write(string + "\n") string = ";Recall;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['recall'] + ";" string += "%.2f (%.2f)"% (lr['recall'],lr['recall_std']) + ";" string += "%.2f (%.2f)"% (mv['recall'],mv['recall_std']) f.write(string + "\n") string = ";Negative Predictive Value;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['npv'] + ";" string += "%.2f (%.2f)"% (lr['npv'],lr['npv_std']) + ";" string += "%.2f (%.2f)"% (mv['npv'],mv['npv_std']) f.write(string + "\n") string = ";Specificity;" string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['specificity'] + ";" string += "%.2f (%.2f)"% (lr['specificity'],lr['specificity_std']) + ";" string += "%.2f (%.2f)"% (mv['specificity'],mv['specificity_std']) f.write(string + "\n") f.close()
datas = utils.get_datas(PREPROCESSING_TRAIN_DATA_DIR) datas_y = preprocessing_to_train.select_data_y(datas) train_x, train_y, val_x, val_y = preprocessing_to_train.classification_data( data_x=datas, data_y=datas_y) with tf.Session() as sess: inputs, labels, keep_prob = utils.build_input() predictions, cost, optimizer, initial_state, final_state, cell = LSTM.LSTM_cell( LSTM_SIZE, keep_prob, LSTM_LAYERS, BATCH_SIZE, train_x, labels, LEARNING_RATE) accuracy = utils.Validation(predictions, labels) utils.draw_scalar(cost, 'loss') utils.draw_scalar(accuracy, 'Batch accurcy') merged = tf.summary.merge_all() writer = tf.summary.FileWriter('logs/', sess.graph) sess.run(tf.global_variables_initializer()) with graph.as_default(): saver = tf.train.Saver() iteration = 1