def pipeline(col_num, step, dist, acf_dist, prefix, threshold, seed, bed_files, mlog=True, region_filter_p=1, region_filter_n=None, genome_control=False, db=None, use_fdr=True): sys.path.insert(0, op.join(op.dirname(__file__), "..")) from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter from cpv._common import genome_control_adjust, genomic_control, bediter import operator if step is None: step = min(acf_dist, stepsize.stepsize(bed_files, col_num)) print("calculated stepsize as: %i" % step, file=sys.stderr) lags = list(range(1, acf_dist, step)) lags.append(lags[-1] + step) prefix = prefix.rstrip(".") putative_acf_vals = acf.acf(bed_files, lags, col_num, simple=False, mlog=mlog) acf_vals = [] # go out to max requested distance but stop once an autocorrelation # < 0.05 is added. for a in putative_acf_vals: # a is ((lmin, lmax), (corr, N)) # this heuristic seems to work. stop just above the 0.08 correlation # lag. if a[1][0] < 0.04 and len(acf_vals) > 2: break acf_vals.append(a) if a[1][0] < 0.04 and len(acf_vals): break # save the arguments that this was called with. with open(prefix + ".args.txt", "w") as fh: print(" ".join(sys.argv[1:]) + "\n", file=fh) import datetime print("date: %s" % datetime.datetime.today(), file=fh) from .__init__ import __version__ print("version:", __version__, file=fh) with open(prefix + ".acf.txt", "w") as fh: acf_vals = acf.write_acf(acf_vals, fh) print("wrote: %s" % fh.name, file=fh) print("ACF:\n", open(prefix + ".acf.txt").read(), file=sys.stderr) spvals, opvals = array.array('f'), array.array('f') with ts.nopen(prefix + ".slk.bed.gz", "w") as fhslk: fhslk.write('#chrom\tstart\tend\tp\tregion-p\n') for chrom, results in slk.adjust_pvals(bed_files, col_num, acf_vals): fmt = chrom + "\t%i\t%i\t%.4g\t%.4g\n" for row in results: row = tuple(row) fhslk.write(fmt % row) opvals.append(row[-2]) spvals.append(row[-1]) print("# original lambda: %.2f" % genomic_control(opvals), file=sys.stderr) del opvals gc_lambda = genomic_control(spvals) print("wrote: %s with lambda: %.2f" % (fhslk.name, gc_lambda), file=sys.stderr) if genome_control: fhslk = ts.nopen(prefix + ".slk.gc.bed.gz", "w") adj = genome_control_adjust([d['p'] for d in bediter(prefix + ".slk.bed.gz", -1)]) for i, line in enumerate(ts.nopen(prefix + ".slk.bed.gz")): print("%s\t%.5g" % (line.rstrip("\r\n"), adj[i]), file=fhslk) fhslk.close() print("wrote: %s" % fhslk.name, file=sys.stderr) with ts.nopen(prefix + ".fdr.bed.gz", "w") as fh: fh.write('#chrom\tstart\tend\tp\tregion-p\tregion-q\n') for bh, l in fdr.fdr(fhslk.name, -1): fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh)) print("wrote: %s" % fh.name, file=sys.stderr) fregions = prefix + ".regions.bed.gz" with ts.nopen(fregions, "w") as fh: list(peaks.peaks(prefix + ".fdr.bed.gz", -1 if use_fdr else -2, threshold, seed, dist, fh, operator.le)) n_regions = sum(1 for _ in ts.nopen(fregions)) print("wrote: %s (%i regions)" % (fregions, n_regions), file=sys.stderr) if n_regions == 0: sys.exit() with ts.nopen(prefix + ".regions-p.bed.gz", "w") as fh: N = 0 fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tz_p\tz_sidak_p\n") # use -2 for original, uncorrected p-values in slk.bed for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p( prefix + ".slk.bed.gz", prefix + ".regions.bed.gz", -2, step): fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p)) fh.flush() N += int(slk_sidak_p < 0.05) print("wrote: %s, (regions with corrected-p < 0.05: %i)" \ % (fh.name, N), file=sys.stderr) regions_bed = fh.name #if all(h in header for h in ('t', 'start', 'end')): if region_filter_n is None: region_filter_n = 0 with ts.nopen(prefix + ".regions-t.bed", "w") as fh: N = 0 for i, toks in enumerate(filter.filter(bed_files[0], regions_bed, p_col_name=col_num)): if i == 0: toks[0] = "#" + toks[0] else: if float(toks[6]) > region_filter_p: continue if int(toks[4]) < region_filter_n: continue #if region_filter_t and "/" in toks[7]: # # t-pos/t-neg. if the lower one is > region_filter_t? # vals = map(int, toks[7].split("/")) # if min(vals) > region_filter_t: continue N += 1 print("\t".join(toks), file=sys.stderr) print(("wrote: %s, (regions with region-p " "< %.3f and n-probes >= %i: %i)") \ % (fh.name, region_filter_p, region_filter_n, N), file=sys.stderr) try: from cpv import manhattan regions = manhattan.read_regions(fh.name) manhattan.manhattan(prefix + ".slk.bed.gz", 3, prefix.rstrip(".") + ".manhattan.png", False, ['#959899', '#484B4C'], "", False, None, regions=regions, bonferonni=False) except ImportError: pass # they dont have matplotlib if db is not None: from cruzdb import Genome g = Genome(db) lastf = fh.name with open(prefix + ".anno.%s.bed" % db, "w") as fh: fh.write('#') g.annotate(lastf, ("refGene", "cpgIslandExt"), out=fh, feature_strand=True, parallel=len(spvals) > 500) print("wrote: %s annotated with %s" % (fh.name, db), file=sys.stderr)
def manhattan(fname, col_num, image_path, no_log, colors, title, lines, ymax, bonferonni=False, regions=None, subplots=False): """ regions is keyed by chromosome with [(start, stop), ...] extents of the regions to highlight """ xs, ys, cs = [], [], [] region_xys = [] # highlight certain regions. colors = cycle(colors) chrom_centers = [] last_x = 0 nrows = 0 giter = [(seqid, list(rlist)) for seqid, rlist \ in groupby(bediter(fname, col_num), key=itemgetter('chrom'))] region_xs, region_ys = [], [] new_bounds = [] rcolors = cycle(('#AE2117', '#EA352B')) for seqid, rlist in sorted(giter, cmp=chr_cmp): color = colors.next() nrows += len(rlist) # since chroms are on the same plot. add this chrom to the end of the # last chrom rcolor = rcolors.next() region_xs = [last_x + r['start'] for r in rlist] xs.extend(region_xs) ys.extend([r['p'] for r in rlist]) cs.extend([color] * len(rlist)) if regions and seqid in regions: regions_bounds = regions[seqid] if len(regions_bounds) < 500: region_xys.extend([(last_x + r['start'], r['p'], rcolor) for r in rlist \ if any((s - 1 <= r['start'] <= e + 1) for s, e in regions_bounds)]) else: sys.stderr.write("regions for %s > 500, not plotting\n" % seqid) # adjust the bounds of each region based on chrom. new_bounds.extend([(last_x + s, last_x + e) for s, e in regions_bounds]) # save the middle of the region to place the label chrom_centers.append((seqid, (region_xs[0] + region_xs[-1]) / 2)) # keep track so that chrs don't overlap. last_x = xs[-1] xs = np.array(xs) ys = np.array(ys) if no_log else -np.log10(ys) plt.close() f, ax = plt.subplots(1, figsize=(10, 6)) bonferonni_p = 0.05 / nrows if title is not None: plt.title(title) ax.set_ylabel('' if no_log else '-log10(p)') if regions: #""" # Plot as colored background if len(new_bounds) < 32: for s, e in new_bounds: ax.axvspan(s - 2, e + 2, facecolor='#EA352B', ec='#EA352B', alpha=0.3, zorder=0) #""" # plot as points. rxs, rys, rcs = zip(*region_xys) if not no_log: rys = -np.log10(rys) ax.scatter(rxs, rys, # s=rys ** 1.3, # size by -log10(p) s = 6, c=rcs, edgecolors=rcs, zorder=2) if lines: ax.vlines(xs, 0, ys, colors=cs, alpha=0.5) else: alpha = 0.8 if len(xs) < 10000 else 0.6 edgecolors = 'k' if len(xs) < 10000 else 'none' ax.scatter(xs, ys, s=3.5, c=cs, edgecolors=edgecolors, alpha=alpha, zorder=1) # plot 0.05 line after multiple testing. always nlog10'ed since # that's the space we're plotting in. if bonferonni: ax.axhline(y=-np.log10(bonferonni_p), color='0.5', linewidth=2) #plt.axis('tight') if max(xs) - min(xs) > 10000: plt.xlim(0, xs[-1]) else: plt.xlim(xs[0], xs[-1]) plt.ylim(ymin=0) if ymax is not None: plt.ylim(ymax=ymax) plt.xticks([c[1] for c in chrom_centers], [c[0].replace('chr', '') for c in chrom_centers], rotation=-90, size=8.5) #plt.show() print >>sys.stderr, "Bonferonni-corrected p-value for %i rows: %.3g" \ % (nrows, 0.05 / nrows) print >>sys.stderr, "values less than Bonferonni-corrected p-value: %i " \ % (ys > -np.log10(bonferonni_p)).sum() if subplots: pys = np.sort(10**-ys) # convert back to actual p-values gc = genomic_control(pys) ax_qq = f.add_axes((0.74, 0.12, 0.22, 0.22), alpha=0.2) ax_qq.text(0.03, 0.88, r'$\lambda : %.3f$' % gc, transform=ax_qq.transAxes) qqplot(ys, ax_qq) ax_hist = f.add_axes((0.12, 0.12, 0.22, 0.22), frameon=True, alpha=0.6) hist(pys, ax_hist) print >>sys.stderr, "saving to: %s" % image_path f.tight_layout() plt.savefig(image_path) return image_path
def pipeline(col_num, step, dist, acf_dist, prefix, threshold, seed, bed_files, mlog=True, region_filter_p=1, region_filter_n=None, genome_control=False, db=None, use_fdr=True): sys.path.insert(0, op.join(op.dirname(__file__), "..")) from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter from cpv._common import genome_control_adjust, genomic_control, bediter import operator if step is None: step = min(acf_dist, stepsize.stepsize(bed_files, col_num)) print >> sys.stderr, "calculated stepsize as: %i" % step lags = range(1, acf_dist, step) lags.append(lags[-1] + step) prefix = prefix.rstrip(".") putative_acf_vals = acf.acf(bed_files, lags, col_num, simple=False, mlog=mlog) acf_vals = [] # go out to max requested distance but stop once an autocorrelation # < 0.05 is added. for a in putative_acf_vals: # a is ((lmin, lmax), (corr, N)) # this heuristic seems to work. stop just above the 0.08 correlation # lag. if a[1][0] < 0.04 and len(acf_vals) > 2: break acf_vals.append(a) if a[1][0] < 0.04 and len(acf_vals): break # save the arguments that this was called with. with open(prefix + ".args.txt", "w") as fh: print >> fh, " ".join(sys.argv[1:]) + "\n" import datetime print >> fh, "date: %s" % datetime.datetime.today() from .__init__ import __version__ print >> fh, "version:", __version__ with open(prefix + ".acf.txt", "w") as fh: acf_vals = acf.write_acf(acf_vals, fh) print >> sys.stderr, "wrote: %s" % fh.name print >> sys.stderr, "ACF:\n", open(prefix + ".acf.txt").read() spvals, opvals = [], [] with ts.nopen(prefix + ".slk.bed.gz", "w") as fhslk: fhslk.write('#chrom\tstart\tend\tp\tregion-p\n') for row in slk.adjust_pvals(bed_files, col_num, acf_vals): fhslk.write("%s\t%i\t%i\t%.4g\t%.4g\n" % row) opvals.append(row[-2]) spvals.append(row[-1]) print >> sys.stderr, "# original lambda: %.2f" % genomic_control(opvals) del opvals gc_lambda = genomic_control(spvals) print >> sys.stderr, "wrote: %s with lambda: %.2f" % (fhslk.name, gc_lambda) if genome_control: fhslk = ts.nopen(prefix + ".slk.gc.bed.gz", "w") adj = genome_control_adjust( [d['p'] for d in bediter(prefix + ".slk.bed.gz", -1)]) for i, line in enumerate(ts.nopen(prefix + ".slk.bed.gz")): print >> fhslk, "%s\t%.5g" % (line.rstrip("\r\n"), adj[i]) fhslk.close() print >> sys.stderr, "wrote: %s" % fhslk.name with ts.nopen(prefix + ".fdr.bed.gz", "w") as fh: fh.write('#chrom\tstart\tend\tp\tregion-p\tregion-q\n') for bh, l in fdr.fdr(fhslk.name, -1): fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh)) print >> sys.stderr, "wrote: %s" % fh.name fregions = prefix + ".regions.bed.gz" with ts.nopen(fregions, "w") as fh: list( peaks.peaks(prefix + ".fdr.bed.gz", -1 if use_fdr else -2, threshold, seed, dist, fh, operator.le)) n_regions = sum(1 for _ in ts.nopen(fregions)) print >> sys.stderr, "wrote: %s (%i regions)" % (fregions, n_regions) if n_regions == 0: sys.exit() with ts.nopen(prefix + ".regions-p.bed.gz", "w") as fh: N = 0 fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tz_p\tz_sidak_p\n") # use -2 for original, uncorrected p-values in slk.bed for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p( prefix + ".slk.bed.gz", prefix + ".regions.bed.gz", -2, step): fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p)) fh.flush() N += int(slk_sidak_p < 0.05) print >>sys.stderr, "wrote: %s, (regions with corrected-p < 0.05: %i)" \ % (fh.name, N) regions_bed = fh.name header = ts.header(bed_files[0]) #if all(h in header for h in ('t', 'start', 'end')): if region_filter_n is None: region_filter_n = 0 with ts.nopen(prefix + ".regions-t.bed", "w") as fh: N = 0 for i, toks in enumerate( filter.filter(bed_files[0], regions_bed, p_col_name=col_num)): if i == 0: toks[0] = "#" + toks[0] else: if float(toks[6]) > region_filter_p: continue if int(toks[4]) < region_filter_n: continue #if region_filter_t and "/" in toks[7]: # # t-pos/t-neg. if the lower one is > region_filter_t? # vals = map(int, toks[7].split("/")) # if min(vals) > region_filter_t: continue N += 1 print >> fh, "\t".join(toks) print >>sys.stderr, ("wrote: %s, (regions with region-p " "< %.3f and n-probes >= %i: %i)") \ % (fh.name, region_filter_p, region_filter_n, N) try: from cpv import manhattan regions = manhattan.read_regions(fh.name) manhattan.manhattan(prefix + ".slk.bed.gz", 3, prefix.rstrip(".") + ".manhattan.png", False, ['#959899', '#484B4C'], "", False, None, regions=regions, bonferonni=False) except ImportError: pass # they dont have matplotlib if db is not None: from cruzdb import Genome g = Genome(db) lastf = fh.name with open(prefix + ".anno.%s.bed" % db, "w") as fh: fh.write('#') g.annotate(lastf, ("refGene", "cpgIslandExt"), out=fh, feature_strand=True, parallel=len(spvals) > 500) print >> sys.stderr, "wrote: %s annotated with %s" % (fh.name, db)
def manhattan(fname, col_num, image_path, no_log, colors, title, lines, ymax, bonferonni=False, regions=None, subplots=False): """ regions is keyed by chromosome with [(start, stop), ...] extents of the regions to highlight """ xs, ys, cs = [], [], [] region_xys = [] # highlight certain regions. colors = cycle(colors) chrom_centers = [] last_x = 0 nrows = 0 giter = [(seqid, list(rlist)) for seqid, rlist \ in groupby(bediter(fname, col_num), key=itemgetter('chrom'))] region_xs, region_ys = [], [] new_bounds = [] rcolors = cycle(('#AE2117', '#EA352B')) for seqid, rlist in sorted(giter, cmp=chr_cmp): color = colors.next() nrows += len(rlist) # since chroms are on the same plot. add this chrom to the end of the # last chrom rcolor = rcolors.next() region_xs = [last_x + r['start'] for r in rlist] xs.extend(region_xs) ys.extend([r['p'] for r in rlist]) cs.extend([color] * len(rlist)) if regions and seqid in regions: regions_bounds = regions[seqid] if len(regions_bounds) < 500: region_xys.extend([(last_x + r['start'], r['p'], rcolor) for r in rlist \ if any((s - 1 <= r['start'] <= e + 1) for s, e in regions_bounds)]) else: sys.stderr.write("regions for %s > 500, not plotting\n" % seqid) # adjust the bounds of each region based on chrom. new_bounds.extend([(last_x + s, last_x + e) for s, e in regions_bounds]) # save the middle of the region to place the label chrom_centers.append((seqid, (region_xs[0] + region_xs[-1]) / 2)) # keep track so that chrs don't overlap. last_x = xs[-1] xs = np.array(xs) ys = np.array(ys) if no_log else -np.log10(ys) plt.close() f, ax = plt.subplots(1, figsize=(10, 6)) bonferonni_p = 0.05 / nrows if title is not None: plt.title(title) ax.set_ylabel('' if no_log else '-log10(p)') if regions: #""" # Plot as colored background if len(new_bounds) < 32: for s, e in new_bounds: ax.axvspan(s - 2, e + 2, facecolor='#EA352B', ec='#EA352B', alpha=0.3, zorder=0) #""" # plot as points. rxs, rys, rcs = zip(*region_xys) if not no_log: rys = -np.log10(rys) ax.scatter( rxs, rys, # s=rys ** 1.3, # size by -log10(p) s=6, c=rcs, edgecolors=rcs, zorder=2) if lines: ax.vlines(xs, 0, ys, colors=cs, alpha=0.5) else: alpha = 0.8 if len(xs) < 10000 else 0.6 edgecolors = 'k' if len(xs) < 10000 else 'none' ax.scatter(xs, ys, s=3.5, c=cs, edgecolors=edgecolors, alpha=alpha, zorder=1) # plot 0.05 line after multiple testing. always nlog10'ed since # that's the space we're plotting in. if bonferonni: ax.axhline(y=-np.log10(bonferonni_p), color='0.5', linewidth=2) #plt.axis('tight') if max(xs) - min(xs) > 10000: plt.xlim(0, xs[-1]) else: plt.xlim(xs[0], xs[-1]) plt.ylim(ymin=0) if ymax is not None: plt.ylim(ymax=ymax) plt.xticks([c[1] for c in chrom_centers], [c[0].replace('chr', '') for c in chrom_centers], rotation=-90, size=8.5) #plt.show() print >>sys.stderr, "Bonferonni-corrected p-value for %i rows: %.3g" \ % (nrows, 0.05 / nrows) print >>sys.stderr, "values less than Bonferonni-corrected p-value: %i " \ % (ys > -np.log10(bonferonni_p)).sum() if subplots: pys = np.sort(10**-ys) # convert back to actual p-values gc = genomic_control(pys) ax_qq = f.add_axes((0.74, 0.12, 0.22, 0.22), alpha=0.2) ax_qq.text(0.03, 0.88, r'$\lambda : %.3f$' % gc, transform=ax_qq.transAxes) qqplot(ys, ax_qq) ax_hist = f.add_axes((0.12, 0.12, 0.22, 0.22), frameon=True, alpha=0.6) hist(pys, ax_hist) print >> sys.stderr, "saving to: %s" % image_path f.tight_layout() plt.savefig(image_path) return image_path
def pipeline(col_num, step, dist, prefix, threshold, seed, bed_files, mlog=False, region_filter_p=1, region_filter_n=1, genome_control=False, db=None): sys.path.insert(0, op.join(op.dirname(__file__), "..")) from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter from cpv._common import genome_control_adjust, genomic_control, bediter import operator if step is None: step = stepsize.stepsize(bed_files, col_num) print >>sys.stderr, "calculated stepsize as: %i" % step lags = range(1, dist, step) lags.append(lags[-1] + step) prefix = prefix.rstrip(".") #if genome_control: # with open(prefix + ".adj.bed", "w") as fh: # genome_control_adjust_bed(bed_files, col_num, fh) # bed_files = [fh.name] putative_acf_vals = acf.acf(bed_files, lags, col_num, simple=False, mlog=mlog) acf_vals = [] # go out to max requested distance but stop once an autocorrelation # < 0.05 is added. for a in putative_acf_vals: # a is ((lmin, lmax), (corr, N)) # this heuristic seems to work. stop just above the 0.08 correlation # lag. if a[1][0] < 0.04 and len(acf_vals) > 2: break acf_vals.append(a) if a[1][0] < 0.04 and len(acf_vals): break # save the arguments that this was called with. with open(prefix + ".args.txt", "w") as fh: print >>fh, " ".join(sys.argv[1:]) + "\n" import datetime print >>fh, "date: %s" % datetime.datetime.today() with open(prefix + ".acf.txt", "w") as fh: acf_vals = acf.write_acf(acf_vals, fh) print >>sys.stderr, "wrote: %s" % fh.name print >>sys.stderr, "ACF:\n", open(prefix + ".acf.txt").read() spvals, opvals = [], [] with open(prefix + ".slk.bed", "w") as fhslk: for row in slk.adjust_pvals(bed_files, col_num, acf_vals): fhslk.write("%s\t%i\t%i\t%.4g\t%.4g\n" % row) opvals.append(row[-2]) spvals.append(row[-1]) print >>sys.stderr, "# original lambda: %.2f" % genomic_control(opvals) del opvals gc_lambda = genomic_control(spvals) print >>sys.stderr, "wrote: %s with lambda: %.2f" % (fhslk.name, gc_lambda) if genome_control: fhslk = open(prefix + ".slk.gc.bed", "w") adj = genome_control_adjust([d['p'] for d in bediter(prefix + ".slk.bed", -1)]) for i, line in enumerate(open(prefix + ".slk.bed")): print >>fhslk, "%s\t%.5g" % (line.rstrip("\r\n"), adj[i]) fhslk.close() print >>sys.stderr, "wrote: %s" % fhslk.name with open(prefix + ".fdr.bed", "w") as fh: for bh, l in fdr.fdr(fhslk.name, -1): fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh)) print >>sys.stderr, "wrote: %s" % fh.name fregions = prefix + ".regions.bed" with open(fregions, "w") as fh: list(peaks.peaks(prefix + ".fdr.bed", -1, threshold, seed, step, fh, operator.le)) n_regions = sum(1 for _ in open(fregions)) print >>sys.stderr, "wrote: %s (%i regions)" % (fregions, n_regions) with open(prefix + ".regions-p.bed", "w") as fh: N = 0 fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tslk_p\tslk_sidak_p\n") # use -2 for original, uncorrected p-values in slk.bed for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p( prefix + ".slk.bed", prefix + ".regions.bed", -2, 0, step, mlog=mlog): fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p)) fh.flush() N += int(slk_sidak_p < 0.05) print >>sys.stderr, "wrote: %s, (regions with corrected-p < 0.05: %i)" \ % (fh.name, N) regions_bed = fh.name header = (gzip.open(bed_files[0]) if bed_files[0].endswith(".gz") else open(bed_files[0])).next().split("\t") if all(h in header for h in ('t', 'start', 'end')): with open(prefix + ".regions-t.bed", "w") as fh: N = 0 for i, toks in enumerate(filter.filter(bed_files[0], regions_bed, p_col_name=col_num)): if i == 0: toks[0] = "#" + toks[0] else: if float(toks[6]) > region_filter_p: continue if int(toks[4]) < region_filter_n: continue N += 1 print >>fh, "\t".join(toks) print >>sys.stderr, ("wrote: %s, (regions with region-p" "< %.3f and n-probes >= %i: %i)") \ % (fh.name, region_filter_p, region_filter_n, N) try: from cpv import manhattan regions = manhattan.read_regions(fh.name) manhattan.manhattan(prefix + ".slk.bed", 3, prefix.rstrip(".") + ".manhattan.png", False, ['#959899', '#484B4C'], "", False, None, regions=regions, bonferonni=True) except ImportError: pass # they dont have matplotlib if db is not None: from cruzdb import Genome g = Genome(db) lastf = fh.name with open(prefix + ".anno.%s.bed" % db, "w") as fh: g.annotate(lastf, ("refGene", "cpgIslandExt", "cytoBand"), out=fh, feature_strand=True, parallel=len(spvals) > 500) print >>sys.stderr, "wrote: %s annotated with %s" % (fh.name, db)