Ejemplo n.º 1
0
    def stats(self, fg_fa, bg_fa, logger=None):
        try:
            stats = {}
            fg_result = self.pwm_scan_all(fg_fa,
                                          cutoff=0.0,
                                          nreport=1,
                                          scan_rc=True)
            bg_result = self.pwm_scan_all(bg_fa,
                                          cutoff=0.0,
                                          nreport=1,
                                          scan_rc=True)

            pos = []
            for x in fg_result.values():
                if len(x):
                    pos.append(x[0][1])
                else:
                    pos.append(-100)

            neg = []
            for x in bg_result.values():
                if len(x):
                    neg.append(x[0][1])
                else:
                    neg.append(-100)

            stats["mncp"] = MNCP(pos, neg)
            stats["roc_auc"] = ROC_AUC(pos, neg)
            x, y = max_enrichment(pos, neg)
            stats["maxenr"] = x
            stats["scoreatmaxenr"] = y
            stats["fraction"] = fraction_fdr(pos, neg)
            stats["score_fdr"] = score_at_fdr(pos, neg)
            stats["enr_fdr"] = enr_at_fdr(pos, neg)
            stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score()
                                   ) / (self.pwm_max_score() -
                                        self.pwm_min_score())

            pos = [x[0][0] for x in fg_result.values() if len(x)]
            p = ks_pvalue(pos, max(pos))
            stats["ks"] = p
            if p > 0:
                stats["ks_sig"] = -log(p) / log(10)
            else:
                stats["ks_sig"] = "Inf"

            return stats
        except Exception as e:
            #e = sys.exc_info()[0]
            msg = "Error calculating stats of {0}, error {1}".format(
                self.id, str(e))
            if logger:
                logger.error(msg)
            else:
                print msg
            raise
Ejemplo n.º 2
0
    def stats(self, fg_fa, bg_fa, logger=None):
        from gimmemotifs.rocmetrics import MNCP, ROC_AUC, max_enrichment, fraction_fdr, score_at_fdr, enr_at_fdr
        from gimmemotifs.fasta import Fasta
        from gimmemotifs.utils import ks_pvalue
        from numpy import array, std
        from math import log

        try:
            stats = {}
            fg_result = self.pwm_scan_all(fg_fa, cutoff=0.0, nreport=1, scan_rc=True)
            bg_result = self.pwm_scan_all(bg_fa, cutoff=0.0, nreport=1, scan_rc=True)

            pos = []
            for x in fg_result.values():
                if len(x):
                    pos.append(x[0][1])
                else:
                    pos.append(-100)

            neg = []
            for x in bg_result.values():
                if len(x):
                    neg.append(x[0][1])
                else:
                    neg.append(-100)

            stats["mncp"] = MNCP(pos, neg)
            stats["roc_auc"] = ROC_AUC(pos, neg)
            x, y = max_enrichment(pos, neg)
            stats["maxenr"] = x
            stats["scoreatmaxenr"] = y
            stats["fraction"] = fraction_fdr(pos, neg)
            stats["score_fdr"] = score_at_fdr(pos, neg)
            stats["enr_fdr"] = enr_at_fdr(pos, neg)
            stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score()) / (
                self.pwm_max_score() - self.pwm_min_score()
            )

            pos = [x[0][0] for x in fg_result.values() if len(x)]
            p = ks_pvalue(pos, max(pos))
            stats["ks"] = p
            if p > 0:
                stats["ks_sig"] = -log(p) / log(10)
            else:
                stats["ks_sig"] = "Inf"

            return stats
        except Exception as e:
            raise
            # e = sys.exc_info()[0]
            msg = "Error calculating stats of {0}, error {1}".format(self.id, str(e))
            if logger:
                logger.error(msg)
            else:
                print msg
Ejemplo n.º 3
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and   not outputfile.endswith(".png"):
        outputfile += ".png"
    
    motifs = read_motifs(open(pwmfile), fmt="pwm")

    s = Scanner()
    s.set_motifs(pwmfile)
    
    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = [m.id for m in motifs]

    fg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(fg_file):
        for motif,score in zip(motifs, scores):
            fg_total[motif.id].append(score)
    
    bg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(bg_file):
        for motif,score in zip(motifs, scores):
            bg_total[motif.id].append(score)
   
    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr.\tRecall at 10% FDR"
    for motif_id in ids:
        fg_vals = fg_total[motif_id] 
        bg_vals = bg_total[motif_id]    
        (x, y) = ROC_values(fg_vals, bg_vals) 
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr,score = max_enrichment(fg_vals, bg_vals)
        recall = recall_at_fdr(fg_vals, bg_vals, 0.1)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f\t%0.4f" % (
                motif_id, auc, mncp, enr_fdr, max_enr, recall)
    
    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)
Ejemplo n.º 4
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and not outputfile.endswith(".png"):
        outputfile += ".png"

    motifs = read_motifs(open(pwmfile), fmt="pwm")

    s = Scanner()
    s.set_motifs(pwmfile)

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = [m.id for m in motifs]

    fg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(fg_file):
        for motif, score in zip(motifs, scores):
            fg_total[motif.id].append(score)

    bg_total = dict([(m.id, []) for m in motifs])
    for scores in s.best_score(bg_file):
        for motif, score in zip(motifs, scores):
            bg_total[motif.id].append(score)

    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr."
    for motif_id in ids:
        fg_vals = fg_total[motif_id]
        bg_vals = bg_total[motif_id]
        (x, y) = ROC_values(fg_vals, bg_vals)
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr, score = max_enrichment(fg_vals, bg_vals)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (motif_id, auc, mncp, enr_fdr,
                                                 max_enr)

    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)
Ejemplo n.º 5
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and   not outputfile.endswith(".png"):
        outputfile += ".png"
    
    motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)])

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = motifs.keys()

    fg_total = {}
    result = scan(fg_file, [motifs[x] for x in ids], 0.0, 1)    
    for key,m in result.items():
        fg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()]
   
    bg_total = {}
    result = scan(bg_file, [motifs[x] for x in ids], 0.0, 1)    
    for key,m in result.items():
        bg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()]
    
    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr."
    for id in ids:
        fg_vals = fg_total[id] 
        bg_vals = bg_total[id]    
        (x, y) = ROC_values(fg_vals, bg_vals) 
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr,score = max_enrichment(fg_vals, bg_vals)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (id, auc, mncp, enr_fdr, max_enr)
    
    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)