Esempio n. 1
0
def roc(args):
    """ Calculate ROC_AUC and other metrics and optionally plot ROC curve.
    """
    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background
    outputfile = args.outfile
    # Default extension for image
    if outputfile and   not outputfile.endswith(".png"):
        outputfile += ".png"
    
    motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)])

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = motifs.keys()

    fg_total = {}
    result = scan(fg_file, [motifs[x] for x in ids], 0.0, 1)    
    for key,m in result.items():
        fg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()]
   
    bg_total = {}
    result = scan(bg_file, [motifs[x] for x in ids], 0.0, 1)    
    for key,m in result.items():
        bg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()]
    
    plot_x = []
    plot_y = []
    # Print the metrics
    print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr."
    for id in ids:
        fg_vals = fg_total[id] 
        bg_vals = bg_total[id]    
        (x, y) = ROC_values(fg_vals, bg_vals) 
        plot_x.append(x)
        plot_y.append(y)
        auc = ROC_AUC(fg_vals, bg_vals)
        mncp = MNCP(fg_vals, bg_vals)
        enr_fdr = enr_at_fdr(fg_vals, bg_vals)
        max_enr,score = max_enrichment(fg_vals, bg_vals)
        print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (id, auc, mncp, enr_fdr, max_enr)
    
    # Plot the ROC curve
    if outputfile:
        roc_plot(outputfile, plot_x, plot_y, ids=ids)
Esempio n. 2
0
def threshold(args):
    if args.fdr < 0 or args.fdr > 1:
        print "Please specify a FDR between 0 and 1"
        sys.exit(1)

    motifs = pwmfile_to_motifs(args.pwmfile)
    result = scan(args.inputfile, motifs, 0.0, 1)

    print "Motif\tScore\tCutoff"
    for motif in result.keys():
        pwm = motif.pwm
        scores = []
        min_score = motif.pwm_min_score()
        scores = [x[0][1] for x in result[motif].values() if len(x) > 0]
        if len(scores) > 0:
            opt_score = scoreatpercentile(scores, 100 - (100 * args.fdr))
            cutoff = (opt_score - min_score) / (motif.pwm_max_score() - min_score)
            print "{0}\t{1}\t{2}".format(motif.id, opt_score , cutoff)
        else:
            sys.stderr.write("Warning: no matches for {0}\n".format(motif.id))