Esempio n. 1
0
    def stats(self, fg_fa, bg_fa, logger=None):
        try:
            stats = {}
            fg_result = self.pwm_scan_all(fg_fa,
                                          cutoff=0.0,
                                          nreport=1,
                                          scan_rc=True)
            bg_result = self.pwm_scan_all(bg_fa,
                                          cutoff=0.0,
                                          nreport=1,
                                          scan_rc=True)

            pos = []
            for x in fg_result.values():
                if len(x):
                    pos.append(x[0][1])
                else:
                    pos.append(-100)

            neg = []
            for x in bg_result.values():
                if len(x):
                    neg.append(x[0][1])
                else:
                    neg.append(-100)

            stats["mncp"] = MNCP(pos, neg)
            stats["roc_auc"] = ROC_AUC(pos, neg)
            x, y = max_enrichment(pos, neg)
            stats["maxenr"] = x
            stats["scoreatmaxenr"] = y
            stats["fraction"] = fraction_fdr(pos, neg)
            stats["score_fdr"] = score_at_fdr(pos, neg)
            stats["enr_fdr"] = enr_at_fdr(pos, neg)
            stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score()
                                   ) / (self.pwm_max_score() -
                                        self.pwm_min_score())

            pos = [x[0][0] for x in fg_result.values() if len(x)]
            p = ks_pvalue(pos, max(pos))
            stats["ks"] = p
            if p > 0:
                stats["ks_sig"] = -log(p) / log(10)
            else:
                stats["ks_sig"] = "Inf"

            return stats
        except Exception as e:
            #e = sys.exc_info()[0]
            msg = "Error calculating stats of {0}, error {1}".format(
                self.id, str(e))
            if logger:
                logger.error(msg)
            else:
                print msg
            raise
Esempio n. 2
0
    def stats(self, fg_fa, bg_fa, logger=None):
        from gimmemotifs.rocmetrics import MNCP, ROC_AUC, max_enrichment, fraction_fdr, score_at_fdr, enr_at_fdr
        from gimmemotifs.fasta import Fasta
        from gimmemotifs.utils import ks_pvalue
        from numpy import array, std
        from math import log

        try:
            stats = {}
            fg_result = self.pwm_scan_all(fg_fa, cutoff=0.0, nreport=1, scan_rc=True)
            bg_result = self.pwm_scan_all(bg_fa, cutoff=0.0, nreport=1, scan_rc=True)

            pos = []
            for x in fg_result.values():
                if len(x):
                    pos.append(x[0][1])
                else:
                    pos.append(-100)

            neg = []
            for x in bg_result.values():
                if len(x):
                    neg.append(x[0][1])
                else:
                    neg.append(-100)

            stats["mncp"] = MNCP(pos, neg)
            stats["roc_auc"] = ROC_AUC(pos, neg)
            x, y = max_enrichment(pos, neg)
            stats["maxenr"] = x
            stats["scoreatmaxenr"] = y
            stats["fraction"] = fraction_fdr(pos, neg)
            stats["score_fdr"] = score_at_fdr(pos, neg)
            stats["enr_fdr"] = enr_at_fdr(pos, neg)
            stats["cutoff_fdr"] = (stats["score_fdr"] - self.pwm_min_score()) / (
                self.pwm_max_score() - self.pwm_min_score()
            )

            pos = [x[0][0] for x in fg_result.values() if len(x)]
            p = ks_pvalue(pos, max(pos))
            stats["ks"] = p
            if p > 0:
                stats["ks_sig"] = -log(p) / log(10)
            else:
                stats["ks_sig"] = "Inf"

            return stats
        except Exception as e:
            raise
            # e = sys.exc_info()[0]
            msg = "Error calculating stats of {0}, error {1}".format(self.id, str(e))
            if logger:
                logger.error(msg)
            else:
                print msg
def motif_localization(fastafile, motif, width, outfile, cutoff=0.9):
	NR_HIST_MATCHES = 100
	from gimmemotifs.utils import plot_histogram, ks_pvalue
	from gimmemotifs.fasta import Fasta
	from numpy import array

	matches = motif.pwm_scan(Fasta(fastafile), cutoff=cutoff, nreport=NR_HIST_MATCHES)
	if len(matches) > 0:
		ar = []
		for a in matches.values():
			ar += a
		matches = array(ar)
		p = ks_pvalue(matches, width - len(motif))
		plot_histogram(matches - width / 2 + len(motif) / 2, outfile, xrange=(-width / 2, width / 2), breaks=21, title="%s (p=%0.2e)" % (motif.id, p), xlabel="Position")
		return motif.id, p
	else:
		return motif.id, 1.0
Esempio n. 4
0
def motif_localization(fastafile, motif, width, outfile):
	NR_HIST_MATCHES = 100
	from gimmemotifs.utils import plot_histogram, ks_pvalue
	from gimmemotifs.fasta import Fasta
	from numpy import array
	
	matches = motif.pwm_scan(Fasta(fastafile), cutoff=0.9, nreport=NR_HIST_MATCHES)
	if len(matches) > 0:
		ar = []
		for a in matches.values():
			ar += a
		matches = array(ar)
		p = ks_pvalue(matches, width - len(motif))
		plot_histogram(matches - width / 2 + len(motif) / 2, outfile, xrange=(-width / 2, width / 2), breaks=21, title="%s (p=%0.2e)" % (motif.id, p), xlabel="Position")
		return motif.id, p
	else:
		return motif.id, 1.0
def motif_localization(fastafile, motif, width, outfile, cutoff, bins=20):
	from tempfile import NamedTemporaryFile
	from subprocess import Popen
	from gimmemotifs.utils import make_gff_histogram, ks_pvalue

	temp = NamedTemporaryFile()
	temp.write(motif.to_pwm())
	temp.flush()

	tempgff = NamedTemporaryFile()
	
	cmd = "pwmscan.py -i %s -p %s -c %s > %s" % (fastafile, temp.name, cutoff, tempgff.name) 
	p = Popen(cmd, shell=True)
	p.communicate()

	make_gff_histogram(tempgff.name,outfile, width ,motif.id, bins)
	return motif.id, ks_pvalue(tempgff.name, width - len(motif))