def create_roc_plots(pwmfile, fgfa, background, outdir): """Make ROC plots for all motifs.""" motifs = read_motifs(pwmfile, fmt="pwm", as_dict=True) ncpus = int(MotifConfig().get_default_params()['ncpus']) pool = Pool(processes=ncpus) jobs = {} for bg,fname in background.items(): for m_id, m in motifs.items(): k = "{}_{}".format(str(m), bg) jobs[k] = pool.apply_async( get_roc_values, (motifs[m_id], fgfa, fname,) ) imgdir = os.path.join(outdir, "images") if not os.path.exists(imgdir): os.mkdir(imgdir) roc_img_file = os.path.join(outdir, "images", "{}_roc.{}.png") for motif in motifs.values(): for bg in background: k = "{}_{}".format(str(motif), bg) error, x, y = jobs[k].get() if error: logger.error("Error in thread: %s", error) logger.error("Motif: %s", motif) sys.exit(1) roc_plot(roc_img_file.format(motif.id, bg), x, y)
def create_roc_plots(pfmfile, fgfa, background, outdir, genome): """Make ROC plots for all motifs.""" motifs = read_motifs(pfmfile, fmt="pwm", as_dict=True) ncpus = int(MotifConfig().get_default_params()["ncpus"]) pool = Pool(processes=ncpus) jobs = {} for bg, fname in background.items(): for m_id, m in motifs.items(): k = "{}_{}".format(str(m), bg) jobs[k] = pool.apply_async(get_roc_values, (motifs[m_id], fgfa, fname, genome)) imgdir = os.path.join(outdir, "images") if not os.path.exists(imgdir): os.mkdir(imgdir) roc_img_file = os.path.join(outdir, "images", "{}_roc.{}.png") for motif in motifs.values(): for bg in background: k = "{}_{}".format(str(motif), bg) error, x, y = jobs[k].get() if error: logger.error("Error in thread: %s", error) logger.error("Motif: %s", motif) sys.exit(1) roc_plot(roc_img_file.format(motif.id, bg), x, y)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = read_motifs(open(pwmfile), fmt="pwm") s = Scanner() s.set_motifs(pwmfile) ids = [] if args.ids: ids = args.ids.split(",") else: ids = [m.id for m in motifs] fg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(fg_file): for motif,score in zip(motifs, scores): fg_total[motif.id].append(score) bg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(bg_file): for motif,score in zip(motifs, scores): bg_total[motif.id].append(score) plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr.\tRecall at 10% FDR" for motif_id in ids: fg_vals = fg_total[motif_id] bg_vals = bg_total[motif_id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr,score = max_enrichment(fg_vals, bg_vals) recall = recall_at_fdr(fg_vals, bg_vals, 0.1) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f\t%0.4f" % ( motif_id, auc, mncp, enr_fdr, max_enr, recall) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = read_motifs(open(pwmfile), fmt="pwm") s = Scanner() s.set_motifs(pwmfile) ids = [] if args.ids: ids = args.ids.split(",") else: ids = [m.id for m in motifs] fg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(fg_file): for motif, score in zip(motifs, scores): fg_total[motif.id].append(score) bg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(bg_file): for motif, score in zip(motifs, scores): bg_total[motif.id].append(score) plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr." for motif_id in ids: fg_vals = fg_total[motif_id] bg_vals = bg_total[motif_id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr, score = max_enrichment(fg_vals, bg_vals) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (motif_id, auc, mncp, enr_fdr, max_enr) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)]) ids = [] if args.ids: ids = args.ids.split(",") else: ids = motifs.keys() fg_total = {} result = scan(fg_file, [motifs[x] for x in ids], 0.0, 1) for key,m in result.items(): fg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()] bg_total = {} result = scan(bg_file, [motifs[x] for x in ids], 0.0, 1) for key,m in result.items(): bg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()] plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr." for id in ids: fg_vals = fg_total[id] bg_vals = bg_total[id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr,score = max_enrichment(fg_vals, bg_vals) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (id, auc, mncp, enr_fdr, max_enr) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)
def create_roc_plots(self, pwm_file, fg_fasta, bg_fasta, name): motifs = dict([(m.id, m) for m in read_motifs(open(pwm_file), fmt="pwm")]) jobs = {} for id,m in motifs.items(): jobs[id] = self.job_server().apply_async(get_roc_values, (motifs[id],fg_fasta,bg_fasta,)) roc_img_file = os.path.join(self.imgdir, "%s_%s_roc.png") for id in motifs.keys(): error, x, y = jobs[id].get() if error: self.logger.error("Error in thread: %s", error) sys.exit(1) roc_plot(roc_img_file % (id,name), x, y)
def create_roc_plots(self, pwm_file, fg_fasta, bg_fasta, name): motifs = dict([(m.id, m) for m in read_motifs(open(pwm_file), fmt="pwm")]) jobs = {} for id, m in motifs.items(): jobs[id] = self.job_server().apply_async(get_roc_values, ( motifs[id], fg_fasta, bg_fasta, )) roc_img_file = os.path.join(self.imgdir, "%s_%s_roc.png") for id in motifs.keys(): error, x, y = jobs[id].get() if error: self.logger.error("Error in thread: %s", error) sys.exit(1) roc_plot(roc_img_file % (id, name), x, y)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.""" outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = read_motifs(open(args.pwmfile), fmt="pwm") ids = [] if args.ids: ids = args.ids.split(",") else: ids = [m.id for m in motifs] motifs = [m for m in motifs if (m.id in ids)] stats = [ "phyper_at_fpr", "roc_auc", "enr_at_fpr", "max_enrichment", "recall_at_fdr", "roc_values", "matches_at_fpr", ] motif_stats = calc_stats(motifs, args.sample, args.background, genome=args.genome, stats=stats) plot_x = [] plot_y = [] legend = [] f_out = sys.stdout if args.outdir: if not os.path.exists(args.outdir): os.makedirs(args.outdir) f_out = open(args.outdir + "/gimme.roc.report.txt", "w") # Print the metrics f_out.write( "Motif\t# matches\t# matches background\tP-value\tlog10 P-value\tROC AUC\tEnr. at 1% FPR\tRecall at 10% FDR\n" ) for motif in motifs: if outputfile: x, y = motif_stats[str(motif)]["roc_values"] plot_x.append(x) plot_y.append(y) legend.append(motif.id) log_pvalue = np.inf if motif_stats[str(motif)]["phyper_at_fpr"] > 0: log_pvalue = -np.log10(motif_stats[str(motif)]["phyper_at_fpr"]) f_out.write( "{}\t{:d}\t{:d}\t{:.2e}\t{:.3f}\t{:.3f}\t{:.2f}\t{:0.4f}\n".format( motif.id, motif_stats[str(motif)]["matches_at_fpr"][0], motif_stats[str(motif)]["matches_at_fpr"][1], motif_stats[str(motif)]["phyper_at_fpr"], log_pvalue, motif_stats[str(motif)]["roc_auc"], motif_stats[str(motif)]["enr_at_fpr"], motif_stats[str(motif)]["recall_at_fdr"], )) f_out.close() if args.outdir: html_report( args.outdir, args.outdir + "/gimme.roc.report.txt", args.pwmfile, 0.01, ) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=legend)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.""" outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = read_motifs(args.pwmfile, fmt="pwm") ids = [] if args.ids: ids = args.ids.split(",") else: ids = [m.id for m in motifs] motifs = [m for m in motifs if (m.id in ids)] stats = [ "phyper_at_fpr", "roc_auc", "pr_auc", "enr_at_fpr", "recall_at_fdr", "roc_values", "matches_at_fpr", ] plot_x = [] plot_y = [] legend = [] f_out = sys.stdout if args.outdir: if not os.path.exists(args.outdir): os.makedirs(args.outdir) f_out = open(args.outdir + "/gimme.roc.report.txt", "w") # Print the metrics f_out.write("Motif\t# matches\t# matches background\tP-value\tlog10 P-value\tROC AUC\tPR AUC\tEnr. at 1% FPR\tRecall at 10% FDR\n") for motif_stats in calc_stats_iterator(motifs, args.sample, args.background, genome=args.genome, stats=stats, ncpus=args.ncpus): for motif in motifs: if str(motif) in motif_stats: if outputfile: x, y = motif_stats[str(motif)]["roc_values"] plot_x.append(x) plot_y.append(y) legend.append(motif.id) log_pvalue = np.inf if motif_stats[str(motif)]["phyper_at_fpr"] > 0: log_pvalue = -np.log10(motif_stats[str(motif)]["phyper_at_fpr"]) f_out.write("{}\t{:d}\t{:d}\t{:.2e}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.2f}\t{:0.4f}\n".format( motif.id, motif_stats[str(motif)]["matches_at_fpr"][0], motif_stats[str(motif)]["matches_at_fpr"][1], motif_stats[str(motif)]["phyper_at_fpr"], log_pvalue, motif_stats[str(motif)]["roc_auc"], motif_stats[str(motif)]["pr_auc"], motif_stats[str(motif)]["enr_at_fpr"], motif_stats[str(motif)]["recall_at_fdr"], )) f_out.close() if args.outdir: html_report( args.outdir, args.outdir + "/gimme.roc.report.txt", args.pwmfile, 0.01, ) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=legend)