Beispiel #1
0
def create_roc_plots(pwmfile, fgfa, background, outdir):
    """Make ROC plots for all motifs."""
    motifs = read_motifs(pwmfile, fmt="pwm", as_dict=True)
    ncpus = int(MotifConfig().get_default_params()['ncpus'])
    pool = Pool(processes=ncpus)
    jobs = {}
    for bg,fname in background.items():
        for m_id, m in motifs.items():

            k = "{}_{}".format(str(m), bg)
            jobs[k] = pool.apply_async(
                                            get_roc_values,
                                            (motifs[m_id], fgfa, fname,)
                                            )
    imgdir = os.path.join(outdir, "images")
    if not os.path.exists(imgdir):
        os.mkdir(imgdir)
    
    roc_img_file = os.path.join(outdir, "images", "{}_roc.{}.png")

    for motif in motifs.values():
        for bg in background:
            k = "{}_{}".format(str(motif), bg)
            error, x, y = jobs[k].get()
            if error:
                logger.error("Error in thread: %s", error)
                logger.error("Motif: %s", motif)
                sys.exit(1)
            roc_plot(roc_img_file.format(motif.id, bg), x, y)
Beispiel #2
0
def create_roc_plots(pfmfile, fgfa, background, outdir, genome):
    """Make ROC plots for all motifs."""
    motifs = read_motifs(pfmfile, fmt="pwm", as_dict=True)
    ncpus = int(MotifConfig().get_default_params()["ncpus"])
    pool = Pool(processes=ncpus)
    jobs = {}
    for bg, fname in background.items():
        for m_id, m in motifs.items():

            k = "{}_{}".format(str(m), bg)
            jobs[k] = pool.apply_async(get_roc_values,
                                       (motifs[m_id], fgfa, fname, genome))
    imgdir = os.path.join(outdir, "images")
    if not os.path.exists(imgdir):
        os.mkdir(imgdir)

    roc_img_file = os.path.join(outdir, "images", "{}_roc.{}.png")

    for motif in motifs.values():
        for bg in background:
            k = "{}_{}".format(str(motif), bg)
            error, x, y = jobs[k].get()
            if error:
                logger.error("Error in thread: %s", error)
                logger.error("Motif: %s", motif)
                sys.exit(1)
            roc_plot(roc_img_file.format(motif.id, bg), x, y)
Beispiel #3
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and   not outputfile.endswith(".png"):
        outputfile += ".png"
    
    motifs = read_motifs(open(pwmfile), fmt="pwm")

    s = Scanner()
    s.set_motifs(pwmfile)
    
    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = [m.id for m in motifs]

    fg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(fg_file):
        for motif,score in zip(motifs, scores):
            fg_total[motif.id].append(score)
    
    bg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(bg_file):
        for motif,score in zip(motifs, scores):
            bg_total[motif.id].append(score)
   
    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr.\tRecall at 10% FDR"
    for motif_id in ids:
        fg_vals = fg_total[motif_id] 
        bg_vals = bg_total[motif_id]    
        (x, y) = ROC_values(fg_vals, bg_vals) 
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr,score = max_enrichment(fg_vals, bg_vals)
        recall = recall_at_fdr(fg_vals, bg_vals, 0.1)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f\t%0.4f" % (
                motif_id, auc, mncp, enr_fdr, max_enr, recall)
    
    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)
Beispiel #4
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and not outputfile.endswith(".png"):
        outputfile += ".png"

    motifs = read_motifs(open(pwmfile), fmt="pwm")

    s = Scanner()
    s.set_motifs(pwmfile)

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = [m.id for m in motifs]

    fg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(fg_file):
        for motif, score in zip(motifs, scores):
            fg_total[motif.id].append(score)

    bg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(bg_file):
        for motif, score in zip(motifs, scores):
            bg_total[motif.id].append(score)

    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr."
    for motif_id in ids:
        fg_vals = fg_total[motif_id]
        bg_vals = bg_total[motif_id]
        (x, y) = ROC_values(fg_vals, bg_vals)
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr, score = max_enrichment(fg_vals, bg_vals)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (motif_id, auc, mncp, enr_fdr,
                                                 max_enr)

    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)
Beispiel #5
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and   not outputfile.endswith(".png"):
        outputfile += ".png"
    
    motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)])

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = motifs.keys()

    fg_total = {}
    result = scan(fg_file, [motifs[x] for x in ids], 0.0, 1)    
    for key,m in result.items():
        fg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()]
   
    bg_total = {}
    result = scan(bg_file, [motifs[x] for x in ids], 0.0, 1)    
    for key,m in result.items():
        bg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()]
    
    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr."
    for id in ids:
        fg_vals = fg_total[id] 
        bg_vals = bg_total[id]    
        (x, y) = ROC_values(fg_vals, bg_vals) 
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr,score = max_enrichment(fg_vals, bg_vals)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (id, auc, mncp, enr_fdr, max_enr)
    
    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)
Beispiel #6
0
    def create_roc_plots(self, pwm_file, fg_fasta, bg_fasta, name):
        motifs = dict([(m.id, m) for m in read_motifs(open(pwm_file), fmt="pwm")])

        jobs = {}
        for id,m in motifs.items():
            jobs[id] = self.job_server().apply_async(get_roc_values, (motifs[id],fg_fasta,bg_fasta,))

        roc_img_file = os.path.join(self.imgdir, "%s_%s_roc.png")

        for id in motifs.keys():
            error, x, y = jobs[id].get()
            if error:
                self.logger.error("Error in thread: %s", error)
                sys.exit(1)

            roc_plot(roc_img_file % (id,name), x, y)
Beispiel #7
0
    def create_roc_plots(self, pwm_file, fg_fasta, bg_fasta, name):
        motifs = dict([(m.id, m)
                       for m in read_motifs(open(pwm_file), fmt="pwm")])

        jobs = {}
        for id, m in motifs.items():
            jobs[id] = self.job_server().apply_async(get_roc_values, (
                motifs[id],
                fg_fasta,
                bg_fasta,
            ))

        roc_img_file = os.path.join(self.imgdir, "%s_%s_roc.png")

        for id in motifs.keys():
            error, x, y = jobs[id].get()
            if error:
                self.logger.error("Error in thread: %s", error)
                sys.exit(1)

            roc_plot(roc_img_file % (id, name), x, y)
Beispiel #8
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve."""
    outputfile = args.outfile
    # Default extension for image
    if outputfile and not outputfile.endswith(".png"):
        outputfile += ".png"

    motifs = read_motifs(open(args.pwmfile), fmt="pwm")

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = [m.id for m in motifs]
    motifs = [m for m in motifs if (m.id in ids)]

    stats = [
        "phyper_at_fpr",
        "roc_auc",
        "enr_at_fpr",
        "max_enrichment",
        "recall_at_fdr",
        "roc_values",
        "matches_at_fpr",
    ]

    motif_stats = calc_stats(motifs,
                             args.sample,
                             args.background,
                             genome=args.genome,
                             stats=stats)

    plot_x = []
    plot_y = []
    legend = []

    f_out = sys.stdout
    if args.outdir:
        if not os.path.exists(args.outdir):
            os.makedirs(args.outdir)
        f_out = open(args.outdir + "/gimme.roc.report.txt", "w")

    # Print the metrics
    f_out.write(
        "Motif\t# matches\t# matches background\tP-value\tlog10 P-value\tROC AUC\tEnr. at 1% FPR\tRecall at 10% FDR\n"
    )
    for motif in motifs:
        if outputfile:
            x, y = motif_stats[str(motif)]["roc_values"]
            plot_x.append(x)
            plot_y.append(y)
            legend.append(motif.id)
        log_pvalue = np.inf
        if motif_stats[str(motif)]["phyper_at_fpr"] > 0:
            log_pvalue = -np.log10(motif_stats[str(motif)]["phyper_at_fpr"])
        f_out.write(
            "{}\t{:d}\t{:d}\t{:.2e}\t{:.3f}\t{:.3f}\t{:.2f}\t{:0.4f}\n".format(
                motif.id,
                motif_stats[str(motif)]["matches_at_fpr"][0],
                motif_stats[str(motif)]["matches_at_fpr"][1],
                motif_stats[str(motif)]["phyper_at_fpr"],
                log_pvalue,
                motif_stats[str(motif)]["roc_auc"],
                motif_stats[str(motif)]["enr_at_fpr"],
                motif_stats[str(motif)]["recall_at_fdr"],
            ))
    f_out.close()

    if args.outdir:
        html_report(
            args.outdir,
            args.outdir + "/gimme.roc.report.txt",
            args.pwmfile,
            0.01,
        )

    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=legend)
Beispiel #9
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve."""
    outputfile = args.outfile
    # Default extension for image
    if outputfile and not outputfile.endswith(".png"):
        outputfile += ".png"
    
    motifs = read_motifs(args.pwmfile, fmt="pwm")

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = [m.id for m in motifs]
    motifs = [m for m in motifs if (m.id in ids)]
    
    stats = [
            "phyper_at_fpr",
            "roc_auc", 
            "pr_auc", 
            "enr_at_fpr",
            "recall_at_fdr", 
            "roc_values",
            "matches_at_fpr",
            ]
    
    plot_x = []
    plot_y = []
    legend = []
    
    f_out = sys.stdout
    if args.outdir:
        if not os.path.exists(args.outdir):
            os.makedirs(args.outdir)
        f_out = open(args.outdir + "/gimme.roc.report.txt", "w")
    
    # Print the metrics
    f_out.write("Motif\t# matches\t# matches background\tP-value\tlog10 P-value\tROC AUC\tPR AUC\tEnr. at 1% FPR\tRecall at 10% FDR\n")
    
    
    for motif_stats in calc_stats_iterator(motifs, args.sample, args.background, 
            genome=args.genome, stats=stats, ncpus=args.ncpus):
    
        for motif in motifs:
            if str(motif) in motif_stats:
                if outputfile:
                    x, y = motif_stats[str(motif)]["roc_values"]
                    plot_x.append(x)
                    plot_y.append(y)
                    legend.append(motif.id)
                log_pvalue = np.inf
                if motif_stats[str(motif)]["phyper_at_fpr"] > 0:
                    log_pvalue = -np.log10(motif_stats[str(motif)]["phyper_at_fpr"])
                f_out.write("{}\t{:d}\t{:d}\t{:.2e}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.2f}\t{:0.4f}\n".format(
                      motif.id, 
                      motif_stats[str(motif)]["matches_at_fpr"][0], 
                      motif_stats[str(motif)]["matches_at_fpr"][1], 
                      motif_stats[str(motif)]["phyper_at_fpr"], 
                      log_pvalue, 
                      motif_stats[str(motif)]["roc_auc"], 
                      motif_stats[str(motif)]["pr_auc"], 
                      motif_stats[str(motif)]["enr_at_fpr"], 
                      motif_stats[str(motif)]["recall_at_fdr"],
                      ))
    f_out.close() 
    
    if args.outdir:
        html_report(
            args.outdir,
            args.outdir + "/gimme.roc.report.txt",
            args.pwmfile,
            0.01,
            )

    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=legend)