def stats(self, fg_fa, bg_fa, logger=None): try: stats = {} fg_result = self.pwm_scan_all(fg_fa, cutoff=0.0, nreport=1, scan_rc=True) bg_result = self.pwm_scan_all(bg_fa, cutoff=0.0, nreport=1, scan_rc=True) pos = [] for x in fg_result.values(): if len(x): pos.append(x[0][1]) else: pos.append(-100) neg = [] for x in bg_result.values(): if len(x): neg.append(x[0][1]) else: neg.append(-100) stats["mncp"] = MNCP(pos, neg) stats["roc_auc"] = ROC_AUC(pos, neg) x, y = max_enrichment(pos, neg) stats["maxenr"] = x stats["scoreatmaxenr"] = y stats["fraction"] = fraction_fdr(pos, neg) stats["score_fdr"] = score_at_fdr(pos, neg) stats["enr_fdr"] = enr_at_fdr(pos, neg) stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score() ) / (self.pwm_max_score() - self.pwm_min_score()) pos = [x[0][0] for x in fg_result.values() if len(x)] p = ks_pvalue(pos, max(pos)) stats["ks"] = p if p > 0: stats["ks_sig"] = -log(p) / log(10) else: stats["ks_sig"] = "Inf" return stats except Exception as e: #e = sys.exc_info()[0] msg = "Error calculating stats of {0}, error {1}".format( self.id, str(e)) if logger: logger.error(msg) else: print msg raise
def stats(self, fg_fa, bg_fa, logger=None): from gimmemotifs.rocmetrics import MNCP, ROC_AUC, max_enrichment, fraction_fdr, score_at_fdr, enr_at_fdr from gimmemotifs.fasta import Fasta from gimmemotifs.utils import ks_pvalue from numpy import array, std from math import log try: stats = {} fg_result = self.pwm_scan_all(fg_fa, cutoff=0.0, nreport=1, scan_rc=True) bg_result = self.pwm_scan_all(bg_fa, cutoff=0.0, nreport=1, scan_rc=True) pos = [] for x in fg_result.values(): if len(x): pos.append(x[0][1]) else: pos.append(-100) neg = [] for x in bg_result.values(): if len(x): neg.append(x[0][1]) else: neg.append(-100) stats["mncp"] = MNCP(pos, neg) stats["roc_auc"] = ROC_AUC(pos, neg) x, y = max_enrichment(pos, neg) stats["maxenr"] = x stats["scoreatmaxenr"] = y stats["fraction"] = fraction_fdr(pos, neg) stats["score_fdr"] = score_at_fdr(pos, neg) stats["enr_fdr"] = enr_at_fdr(pos, neg) stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score()) / ( self.pwm_max_score() - self.pwm_min_score() ) pos = [x[0][0] for x in fg_result.values() if len(x)] p = ks_pvalue(pos, max(pos)) stats["ks"] = p if p > 0: stats["ks_sig"] = -log(p) / log(10) else: stats["ks_sig"] = "Inf" return stats except Exception as e: raise # e = sys.exc_info()[0] msg = "Error calculating stats of {0}, error {1}".format(self.id, str(e)) if logger: logger.error(msg) else: print msg
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = read_motifs(open(pwmfile), fmt="pwm") s = Scanner() s.set_motifs(pwmfile) ids = [] if args.ids: ids = args.ids.split(",") else: ids = [m.id for m in motifs] fg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(fg_file): for motif,score in zip(motifs, scores): fg_total[motif.id].append(score) bg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(bg_file): for motif,score in zip(motifs, scores): bg_total[motif.id].append(score) plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr.\tRecall at 10% FDR" for motif_id in ids: fg_vals = fg_total[motif_id] bg_vals = bg_total[motif_id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr,score = max_enrichment(fg_vals, bg_vals) recall = recall_at_fdr(fg_vals, bg_vals, 0.1) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f\t%0.4f" % ( motif_id, auc, mncp, enr_fdr, max_enr, recall) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = read_motifs(open(pwmfile), fmt="pwm") s = Scanner() s.set_motifs(pwmfile) ids = [] if args.ids: ids = args.ids.split(",") else: ids = [m.id for m in motifs] fg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(fg_file): for motif, score in zip(motifs, scores): fg_total[motif.id].append(score) bg_total = dict([(m.id, []) for m in motifs]) for scores in s.best_score(bg_file): for motif, score in zip(motifs, scores): bg_total[motif.id].append(score) plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr." for motif_id in ids: fg_vals = fg_total[motif_id] bg_vals = bg_total[motif_id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr, score = max_enrichment(fg_vals, bg_vals) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (motif_id, auc, mncp, enr_fdr, max_enr) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)
def roc(args): """ Calculate ROC_AUC and other metrics and optionally plot ROC curve. """ pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background outputfile = args.outfile # Default extension for image if outputfile and not outputfile.endswith(".png"): outputfile += ".png" motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)]) ids = [] if args.ids: ids = args.ids.split(",") else: ids = motifs.keys() fg_total = {} result = scan(fg_file, [motifs[x] for x in ids], 0.0, 1) for key,m in result.items(): fg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()] bg_total = {} result = scan(bg_file, [motifs[x] for x in ids], 0.0, 1) for key,m in result.items(): bg_total[key.id.split("\t")[0]] = [matches[0][1] for matches in m.values()] plot_x = [] plot_y = [] # Print the metrics print "Motif\tROC AUC\tMNCP\tEnr. at 5% FDR\tMax enr." for id in ids: fg_vals = fg_total[id] bg_vals = bg_total[id] (x, y) = ROC_values(fg_vals, bg_vals) plot_x.append(x) plot_y.append(y) auc = ROC_AUC(fg_vals, bg_vals) mncp = MNCP(fg_vals, bg_vals) enr_fdr = enr_at_fdr(fg_vals, bg_vals) max_enr,score = max_enrichment(fg_vals, bg_vals) print "%s\t%0.3f\t%03f\t%0.2f\t%0.2f" % (id, auc, mncp, enr_fdr, max_enr) # Plot the ROC curve if outputfile: roc_plot(outputfile, plot_x, plot_y, ids=ids)