def stats(self, fg_fa, bg_fa, logger=None): try: stats = {} fg_result = self.pwm_scan_all(fg_fa, cutoff=0.0, nreport=1, scan_rc=True) bg_result = self.pwm_scan_all(bg_fa, cutoff=0.0, nreport=1, scan_rc=True) pos = [] for x in fg_result.values(): if len(x): pos.append(x[0][1]) else: pos.append(-100) neg = [] for x in bg_result.values(): if len(x): neg.append(x[0][1]) else: neg.append(-100) stats["mncp"] = MNCP(pos, neg) stats["roc_auc"] = ROC_AUC(pos, neg) x, y = max_enrichment(pos, neg) stats["maxenr"] = x stats["scoreatmaxenr"] = y stats["fraction"] = fraction_fdr(pos, neg) stats["score_fdr"] = score_at_fdr(pos, neg) stats["enr_fdr"] = enr_at_fdr(pos, neg) stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score() ) / (self.pwm_max_score() - self.pwm_min_score()) pos = [x[0][0] for x in fg_result.values() if len(x)] p = ks_pvalue(pos, max(pos)) stats["ks"] = p if p > 0: stats["ks_sig"] = -log(p) / log(10) else: stats["ks_sig"] = "Inf" return stats except Exception as e: #e = sys.exc_info()[0] msg = "Error calculating stats of {0}, error {1}".format( self.id, str(e)) if logger: logger.error(msg) else: print msg raise
def stats(self, fg_fa, bg_fa, logger=None): from gimmemotifs.rocmetrics import MNCP, ROC_AUC, max_enrichment, fraction_fdr, score_at_fdr, enr_at_fdr from gimmemotifs.fasta import Fasta from gimmemotifs.utils import ks_pvalue from numpy import array, std from math import log try: stats = {} fg_result = self.pwm_scan_all(fg_fa, cutoff=0.0, nreport=1, scan_rc=True) bg_result = self.pwm_scan_all(bg_fa, cutoff=0.0, nreport=1, scan_rc=True) pos = [] for x in fg_result.values(): if len(x): pos.append(x[0][1]) else: pos.append(-100) neg = [] for x in bg_result.values(): if len(x): neg.append(x[0][1]) else: neg.append(-100) stats["mncp"] = MNCP(pos, neg) stats["roc_auc"] = ROC_AUC(pos, neg) x, y = max_enrichment(pos, neg) stats["maxenr"] = x stats["scoreatmaxenr"] = y stats["fraction"] = fraction_fdr(pos, neg) stats["score_fdr"] = score_at_fdr(pos, neg) stats["enr_fdr"] = enr_at_fdr(pos, neg) stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score()) / ( self.pwm_max_score() - self.pwm_min_score() ) pos = [x[0][0] for x in fg_result.values() if len(x)] p = ks_pvalue(pos, max(pos)) stats["ks"] = p if p > 0: stats["ks_sig"] = -log(p) / log(10) else: stats["ks_sig"] = "Inf" return stats except Exception as e: raise # e = sys.exc_info()[0] msg = "Error calculating stats of {0}, error {1}".format(self.id, str(e)) if logger: logger.error(msg) else: print msg
def motif_localization(fastafile, motif, width, outfile, cutoff=0.9): NR_HIST_MATCHES = 100 from gimmemotifs.utils import plot_histogram, ks_pvalue from gimmemotifs.fasta import Fasta from numpy import array matches = motif.pwm_scan(Fasta(fastafile), cutoff=cutoff, nreport=NR_HIST_MATCHES) if len(matches) > 0: ar = [] for a in matches.values(): ar += a matches = array(ar) p = ks_pvalue(matches, width - len(motif)) plot_histogram(matches - width / 2 + len(motif) / 2, outfile, xrange=(-width / 2, width / 2), breaks=21, title="%s (p=%0.2e)" % (motif.id, p), xlabel="Position") return motif.id, p else: return motif.id, 1.0
def motif_localization(fastafile, motif, width, outfile): NR_HIST_MATCHES = 100 from gimmemotifs.utils import plot_histogram, ks_pvalue from gimmemotifs.fasta import Fasta from numpy import array matches = motif.pwm_scan(Fasta(fastafile), cutoff=0.9, nreport=NR_HIST_MATCHES) if len(matches) > 0: ar = [] for a in matches.values(): ar += a matches = array(ar) p = ks_pvalue(matches, width - len(motif)) plot_histogram(matches - width / 2 + len(motif) / 2, outfile, xrange=(-width / 2, width / 2), breaks=21, title="%s (p=%0.2e)" % (motif.id, p), xlabel="Position") return motif.id, p else: return motif.id, 1.0
def motif_localization(fastafile, motif, width, outfile, cutoff, bins=20): from tempfile import NamedTemporaryFile from subprocess import Popen from gimmemotifs.utils import make_gff_histogram, ks_pvalue temp = NamedTemporaryFile() temp.write(motif.to_pwm()) temp.flush() tempgff = NamedTemporaryFile() cmd = "pwmscan.py -i %s -p %s -c %s > %s" % (fastafile, temp.name, cutoff, tempgff.name) p = Popen(cmd, shell=True) p.communicate() make_gff_histogram(tempgff.name,outfile, width ,motif.id, bins) return motif.id, ks_pvalue(tempgff.name, width - len(motif))