def read(): rx_window = re.compile(options.pattern_window) # filter any of the DESeq/EdgeR message that end up at the top of the # output file for data in IOTools.iterate(options.stdin): contig, start, end = rx_window.match(data.test_id).groups() start, end = list(map(int, (start, end))) yield DATA._make((data.test_id, contig, start, end, data.treatment_name, float(data.treatment_mean), float(data.treatment_std), data.control_name, float(data.control_mean), float(data.control_std), float(data.pvalue), float(data.qvalue), float(data.l2fold), float(data.fold), int(data.significant), data.status, 0))
def outputAllWindows( infile, outfile ): '''output all Windows as a bed file with the l2fold change as a score. ''' outf = IOTools.openFile( outfile, "w" ) for line in IOTools.iterate( IOTools.openFile( infile ) ): outf.write( "\t".join( (line.contig, line.start, line.end, "%6.4f" % float(line.l2fold ))) + "\n" ) outf.close()
def outputAllWindows(infile, outfile): '''output all Windows as a bed file with the l2fold change as a score. ''' outf = IOTools.openFile(outfile, "w") for line in IOTools.iterate(IOTools.openFile(infile)): outf.write("\t".join((line.contig, line.start, line.end, "%6.4f" % float(line.l2fold))) + "\n") outf.close()
def buildDMRStats(infile, outfile, method): '''build dmr summary statistics. ''' results = collections.defaultdict(lambda: collections.defaultdict(int)) status = collections.defaultdict(lambda: collections.defaultdict(int)) x = 0 for line in IOTools.iterate(IOTools.openFile(infile)): key = (line.treatment_name, line.control_name) r, s = results[key], status[key] r["tested"] += 1 s[line.status] += 1 is_significant = line.significant == "1" up = float(line.l2fold) > 0 down = float(line.l2fold) < 0 fold2up = float(line.l2fold) > 1 fold2down = float(line.l2fold) < -1 fold2 = fold2up or fold2down if up: r["up"] += 1 if down: r["down"] += 1 if fold2up: r["l2fold_up"] += 1 if fold2down: r["l2fold_down"] += 1 if is_significant: r["significant"] += 1 if up: r["significant_up"] += 1 if down: r["significant_down"] += 1 if fold2: r["fold2"] += 1 if fold2up: r["significant_l2fold_up"] += 1 if fold2down: r["significant_l2fold_down"] += 1 header1, header2 = set(), set() for r in results.values(): header1.update(r.keys()) for s in status.values(): header2.update(s.keys()) header = ["method", "treatment", "control"] header1 = list(sorted(header1)) header2 = list(sorted(header2)) outf = IOTools.openFile(outfile, "w") outf.write("\t".join(header + header1 + header2) + "\n") for treatment, control in results.keys(): key = (treatment, control) r = results[key] s = status[key] outf.write("%s\t%s\t%s\t" % (method, treatment, control)) outf.write("\t".join([str(r[x]) for x in header1]) + "\t") outf.write("\t".join([str(s[x]) for x in header2]) + "\n")
def buildDMRStats( infile, outfile, method ): '''build dmr summary statistics. ''' results = collections.defaultdict( lambda : collections.defaultdict(int) ) status = collections.defaultdict( lambda : collections.defaultdict(int) ) x = 0 for line in IOTools.iterate( IOTools.openFile( infile ) ): key = (line.treatment_name, line.control_name ) r,s = results[key], status[key] r["tested"] += 1 s[line.status] += 1 is_significant = line.significant == "1" up = float(line.l2fold) > 0 down = float(line.l2fold) < 0 fold2up = float(line.l2fold) > 1 fold2down = float(line.l2fold) < -1 fold2 = fold2up or fold2down if up: r["up"] += 1 if down: r["down"] += 1 if fold2up: r["l2fold_up"] += 1 if fold2down: r["l2fold_down"] += 1 if is_significant: r["significant"] += 1 if up: r["significant_up"] += 1 if down: r["significant_down"] += 1 if fold2: r["fold2"] += 1 if fold2up: r["significant_l2fold_up"] += 1 if fold2down: r["significant_l2fold_down"] += 1 header1, header2 = set(), set() for r in results.values(): header1.update( r.keys() ) for s in status.values(): header2.update( s.keys() ) header = ["method", "treatment", "control" ] header1 = list(sorted(header1)) header2 = list(sorted(header2)) outf = IOTools.openFile( outfile, "w" ) outf.write( "\t".join(header + header1 + header2) + "\n" ) for treatment,control in results.keys(): key = (treatment,control) r = results[key] s = status[key] outf.write( "%s\t%s\t%s\t" % (method,treatment, control)) outf.write( "\t".join( [str(r[x]) for x in header1 ] ) + "\t" ) outf.write( "\t".join( [str(s[x]) for x in header2 ] ) + "\n" )
def outputAllWindows(infile, outfile): '''output all windows as a bed file with the l2fold change as a score. Arguments --------- infile : string Input filename in :term:`tsv` format. Typically the output from :mod:`scripts/runExpression`. outfile : string Output filename in :term:`bed` format. ''' outf = IOTools.openFile(outfile, "w") for line in IOTools.iterate(IOTools.openFile(infile)): outf.write("\t".join((line.contig, line.start, line.end, "%6.4f" % float(line.l2fold))) + "\n") outf.close()
def outputAllWindows(infile, outfile): '''output all windows as a bed file with the l2fold change as a score. Arguments --------- infile : string Input filename in :term:`tsv` format. Typically the output from :mod:`scripts/runExpression`. outfile : string Output filename in :term:`bed` format. ''' outf = IOTools.openFile(outfile, "w") for line in IOTools.iterate(IOTools.openFile(infile)): outf.write("\t".join( (line.contig, line.start, line.end, "%6.4f" % float(line.l2fold))) + "\n") outf.close()
def buildDMRStats(infiles, outfile, method, fdr_threshold=None): '''build dmr summary statistics. This method works from output files created by Expression.py (method="deseq" or method="edger") or runMEDIPS (method="medips") This method counts the number of up/down, 2fold up/down, etc. genes in output from (:mod:`scripts/runExpression`). This method also creates diagnostic plots in the <exportdir>/<method> directory. Arguments --------- infiles ; list List of tabs with DMR output outfile : string Output filename. Tab separated file summarizing method : string Method name fdr_threshold : float FDR threshold to apply. Currently unused. ''' results = collections.defaultdict(lambda: collections.defaultdict(int)) status = collections.defaultdict(lambda: collections.defaultdict(int)) # deseq/edger def f_significant(x): return x.significant == "1" def f_up(x): return float(x.l2fold) > 0 def f_down(x): return float(x.l2fold) < 0 def f_fold2up(x): return float(x.l2fold) > 1 def f_fold2down(x): return float(x.l2fold) < -1 def f_key(x): return (x.treatment_name, x.control_name) def f_status(x): return x.status outf = IOTools.openFile(outfile, "w") is_first = True for infile in infiles: xx = 0 for line in IOTools.iterate(IOTools.openFile(infile)): key = f_key(line) r, s = results[key], status[key] r["tested"] += 1 ss = f_status(line) s[ss] += 1 if ss != "OK": continue is_significant = f_significant(line) up = f_up(line) down = f_down(line) fold2up = f_fold2up(line) fold2down = f_fold2down(line) fold2 = fold2up or fold2down if up: r["up"] += 1 if down: r["down"] += 1 if fold2up: r["l2fold_up"] += 1 if fold2down: r["l2fold_down"] += 1 if is_significant: r["significant"] += 1 if up: r["significant_up"] += 1 if down: r["significant_down"] += 1 if fold2: r["fold2"] += 1 if fold2up: r["significant_l2fold_up"] += 1 if fold2down: r["significant_l2fold_down"] += 1 if xx > 10000: break if is_first: is_first = False header1, header2 = set(), set() for r in results.values(): header1.update(r.keys()) for s in status.values(): header2.update(s.keys()) header = ["method", "treatment", "control"] header1 = list(sorted(header1)) header2 = list(sorted(header2)) outf.write("\t".join(header + header1 + header2) + "\n") for treatment, control in results.keys(): key = (treatment, control) r = results[key] s = status[key] outf.write("%s\t%s\t%s\t" % (method, treatment, control)) outf.write("\t".join([str(r[x]) for x in header1]) + "\t") outf.write("\t".join([str(s[x]) for x in header2]) + "\n")
def buildDMRStats(infiles, outfile, method, fdr_threshold=None): '''build dmr summary statistics. This method works from output files created by Expression.py (method="deseq" or method="edger") or runMEDIPS (method="medips") ''' results = collections.defaultdict(lambda: collections.defaultdict(int)) status = collections.defaultdict(lambda: collections.defaultdict(int)) # deseq/edger def f_significant(x): return x.significant == "1" def f_up(x): return float(x.l2fold) > 0 def f_down(x): return float(x.l2fold) < 0 def f_fold2up(x): return float(x.l2fold) > 1 def f_fold2down(x): return float(x.l2fold) < -1 def f_key(x): return (x.treatment_name, x.control_name) def f_status(x): return x.status outf = IOTools.openFile(outfile, "w") is_first = True for infile in infiles: xx = 0 for line in IOTools.iterate(IOTools.openFile(infile)): key = f_key(line) r, s = results[key], status[key] r["tested"] += 1 ss = f_status(line) s[ss] += 1 if ss != "OK": continue is_significant = f_significant(line) up = f_up(line) down = f_down(line) fold2up = f_fold2up(line) fold2down = f_fold2down(line) fold2 = fold2up or fold2down if up: r["up"] += 1 if down: r["down"] += 1 if fold2up: r["l2fold_up"] += 1 if fold2down: r["l2fold_down"] += 1 if is_significant: r["significant"] += 1 if up: r["significant_up"] += 1 if down: r["significant_down"] += 1 if fold2: r["fold2"] += 1 if fold2up: r["significant_l2fold_up"] += 1 if fold2down: r["significant_l2fold_down"] += 1 if xx > 10000: break if is_first: is_first = False header1, header2 = set(), set() for r in results.values(): header1.update(r.keys()) for s in status.values(): header2.update(s.keys()) header = ["method", "treatment", "control"] header1 = list(sorted(header1)) header2 = list(sorted(header2)) outf.write("\t".join(header + header1 + header2) + "\n") for treatment, control in results.keys(): key = (treatment, control) r = results[key] s = status[key] outf.write("%s\t%s\t%s\t" % (method, treatment, control)) outf.write("\t".join([str(r[x]) for x in header1]) + "\t") outf.write("\t".join([str(s[x]) for x in header2]) + "\n")