def outputAllWindows(infile, outfile): '''output all windows as a bed file with the l2fold change as a score. Arguments --------- infile : string Input filename in :term:`tsv` format. Typically the output from :mod:`scripts/runExpression`. outfile : string Output filename in :term:`bed` format. ''' outf = IOTools.open_file(outfile, "w") for line in IOTools.iterate(IOTools.open_file(infile)): outf.write("\t".join((line.contig, line.start, line.end, "%6.4f" % float(line.l2fold))) + "\n") outf.close()
def read(): rx_window = re.compile(options.pattern_window) # filter any of the DESeq/EdgeR message that end up at the top of the # output file for data in IOTools.iterate(options.stdin): contig, start, end = rx_window.match(data.test_id).groups() start, end = list(map(int, (start, end))) yield DATA._make( (data.test_id, contig, start, end, data.treatment_name, float(data.treatment_mean), float(data.treatment_std), data.control_name, float(data.control_mean), float(data.control_std), float(data.pvalue), float(data.qvalue), float(data.l2fold), float(data.fold), int(data.significant), data.status, 0))
def buildDMRStats(infiles, outfile, method, fdr_threshold=None): '''build dmr summary statistics. This method works from output files created by Expression.py (method="deseq" or method="edger") or runMEDIPS (method="medips") This method counts the number of up/down, 2fold up/down, etc. genes in output from (:mod:`scripts/runExpression`). This method also creates diagnostic plots in the <exportdir>/<method> directory. Arguments --------- infiles ; list List of tabs with DMR output outfile : string Output filename. Tab separated file summarizing method : string Method name fdr_threshold : float FDR threshold to apply. Currently unused. ''' results = collections.defaultdict(lambda: collections.defaultdict(int)) status = collections.defaultdict(lambda: collections.defaultdict(int)) # deseq/edger def f_significant(x): return x.significant == "1" def f_up(x): return float(x.l2fold) > 0 def f_down(x): return float(x.l2fold) < 0 def f_fold2up(x): return float(x.l2fold) > 1 def f_fold2down(x): return float(x.l2fold) < -1 def f_key(x): return (x.treatment_name, x.control_name) def f_status(x): return x.status outf = IOTools.open_file(outfile, "w") is_first = True for infile in infiles: xx = 0 for line in IOTools.iterate(IOTools.open_file(infile)): key = f_key(line) r, s = results[key], status[key] r["tested"] += 1 ss = f_status(line) s[ss] += 1 if ss != "OK": continue is_significant = f_significant(line) up = f_up(line) down = f_down(line) fold2up = f_fold2up(line) fold2down = f_fold2down(line) fold2 = fold2up or fold2down if up: r["up"] += 1 if down: r["down"] += 1 if fold2up: r["l2fold_up"] += 1 if fold2down: r["l2fold_down"] += 1 if is_significant: r["significant"] += 1 if up: r["significant_up"] += 1 if down: r["significant_down"] += 1 if fold2: r["fold2"] += 1 if fold2up: r["significant_l2fold_up"] += 1 if fold2down: r["significant_l2fold_down"] += 1 if xx > 10000: break if is_first: is_first = False header1, header2 = set(), set() for r in list(results.values()): header1.update(list(r.keys())) for s in list(status.values()): header2.update(list(s.keys())) header = ["method", "treatment", "control"] header1 = list(sorted(header1)) header2 = list(sorted(header2)) outf.write("\t".join(header + header1 + header2) + "\n") for treatment, control in list(results.keys()): key = (treatment, control) r = results[key] s = status[key] outf.write("%s\t%s\t%s\t" % (method, treatment, control)) outf.write("\t".join([str(r[x]) for x in header1]) + "\t") outf.write("\t".join([str(s[x]) for x in header2]) + "\n")