Example #1
0
def pipeline(col_num, step, dist, acf_dist, prefix, threshold, seed,
        bed_files, mlog=True, region_filter_p=1, region_filter_n=None,
        genome_control=False, db=None, use_fdr=True):
    sys.path.insert(0, op.join(op.dirname(__file__), ".."))
    from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter
    from cpv._common import genome_control_adjust, genomic_control, bediter
    import operator


    if step is None:
        step = min(acf_dist, stepsize.stepsize(bed_files, col_num))
        print("calculated stepsize as: %i" % step, file=sys.stderr)

    lags = list(range(1, acf_dist, step))
    lags.append(lags[-1] + step)

    prefix = prefix.rstrip(".")
    putative_acf_vals = acf.acf(bed_files, lags, col_num, simple=False,
                                mlog=mlog)
    acf_vals = []
    # go out to max requested distance but stop once an autocorrelation
    # < 0.05 is added.
    for a in putative_acf_vals:
        # a is ((lmin, lmax), (corr, N))
        # this heuristic seems to work. stop just above the 0.08 correlation
        # lag.
        if a[1][0] < 0.04 and len(acf_vals) > 2: break
        acf_vals.append(a)
        if a[1][0] < 0.04 and len(acf_vals): break

    # save the arguments that this was called with.
    with open(prefix + ".args.txt", "w") as fh:
        print(" ".join(sys.argv[1:]) + "\n", file=fh)
        import datetime
        print("date: %s" % datetime.datetime.today(), file=fh)
        from .__init__ import __version__
        print("version:", __version__, file=fh)

    with open(prefix + ".acf.txt", "w") as fh:
        acf_vals = acf.write_acf(acf_vals, fh)
        print("wrote: %s" % fh.name, file=fh)
    print("ACF:\n", open(prefix + ".acf.txt").read(), file=sys.stderr)

    spvals, opvals = array.array('f'), array.array('f')
    with ts.nopen(prefix + ".slk.bed.gz", "w") as fhslk:
        fhslk.write('#chrom\tstart\tend\tp\tregion-p\n')
        for chrom, results in slk.adjust_pvals(bed_files, col_num, acf_vals):
            fmt = chrom + "\t%i\t%i\t%.4g\t%.4g\n"
            for row in results:
                row = tuple(row)
                fhslk.write(fmt % row)
                opvals.append(row[-2])
                spvals.append(row[-1])

    print("# original lambda: %.2f" % genomic_control(opvals), file=sys.stderr)
    del opvals

    gc_lambda = genomic_control(spvals)
    print("wrote: %s with lambda: %.2f" % (fhslk.name, gc_lambda),
            file=sys.stderr)

    if genome_control:
        fhslk = ts.nopen(prefix + ".slk.gc.bed.gz", "w")
        adj = genome_control_adjust([d['p'] for d in bediter(prefix + ".slk.bed.gz", -1)])
        for i, line in enumerate(ts.nopen(prefix + ".slk.bed.gz")):
            print("%s\t%.5g" % (line.rstrip("\r\n"), adj[i]), file=fhslk)

        fhslk.close()
        print("wrote: %s" % fhslk.name, file=sys.stderr)

    with ts.nopen(prefix + ".fdr.bed.gz", "w") as fh:
        fh.write('#chrom\tstart\tend\tp\tregion-p\tregion-q\n')
        for bh, l in fdr.fdr(fhslk.name, -1):
            fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh))
        print("wrote: %s" % fh.name, file=sys.stderr)
    fregions = prefix + ".regions.bed.gz"
    with ts.nopen(fregions, "w") as fh:
        list(peaks.peaks(prefix + ".fdr.bed.gz", -1 if use_fdr else -2, threshold, seed,
            dist, fh, operator.le))
    n_regions = sum(1 for _ in ts.nopen(fregions))
    print("wrote: %s (%i regions)" % (fregions, n_regions), file=sys.stderr)
    if n_regions == 0:
        sys.exit()

    with ts.nopen(prefix + ".regions-p.bed.gz", "w") as fh:
        N = 0
        fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tz_p\tz_sidak_p\n")
        # use -2 for original, uncorrected p-values in slk.bed
        for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p(
                               prefix + ".slk.bed.gz",
                               prefix + ".regions.bed.gz", -2,
                               step):
            fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p))
            fh.flush()
            N += int(slk_sidak_p < 0.05)
        print("wrote: %s, (regions with corrected-p < 0.05: %i)" \
                % (fh.name, N), file=sys.stderr)

    regions_bed = fh.name
    #if all(h in header for h in ('t', 'start', 'end')):
    if region_filter_n is None: region_filter_n = 0
    with ts.nopen(prefix + ".regions-t.bed", "w") as fh:
        N = 0
        for i, toks in enumerate(filter.filter(bed_files[0],
            regions_bed, p_col_name=col_num)):
            if i == 0: toks[0] = "#" + toks[0]
            else:
                if float(toks[6]) > region_filter_p: continue
                if int(toks[4]) < region_filter_n: continue
                #if region_filter_t and "/" in toks[7]:
                #    # t-pos/t-neg. if the lower one is > region_filter_t?
                #    vals = map(int, toks[7].split("/"))
                #    if min(vals) > region_filter_t: continue

                N += 1
            print("\t".join(toks), file=sys.stderr)
        print(("wrote: %s, (regions with region-p "
                            "< %.3f and n-probes >= %i: %i)") \
                % (fh.name, region_filter_p, region_filter_n, N),
                file=sys.stderr)

    try:
        from cpv import manhattan
        regions = manhattan.read_regions(fh.name)

        manhattan.manhattan(prefix + ".slk.bed.gz", 3, prefix.rstrip(".") + ".manhattan.png",
                         False, ['#959899', '#484B4C'], "", False, None,
                         regions=regions, bonferonni=False)
    except ImportError:
        pass # they dont have matplotlib


    if db is not None:
        from cruzdb import Genome
        g = Genome(db)
        lastf = fh.name
        with open(prefix + ".anno.%s.bed" % db, "w") as fh:
            fh.write('#')
            g.annotate(lastf, ("refGene", "cpgIslandExt"), out=fh,
                    feature_strand=True, parallel=len(spvals) > 500)
        print("wrote: %s annotated with %s" % (fh.name, db), file=sys.stderr)
def manhattan(fname, col_num, image_path, no_log, colors, title, lines, ymax,
             bonferonni=False, regions=None, subplots=False):
    """
    regions is keyed by chromosome with [(start, stop), ...] extents of
    the regions to highlight
    """
    xs, ys, cs = [], [], []
    region_xys = [] # highlight certain regions.
    colors = cycle(colors)
    chrom_centers = []

    last_x = 0
    nrows = 0
    giter = [(seqid, list(rlist)) for seqid, rlist \
        in groupby(bediter(fname, col_num), key=itemgetter('chrom'))]

    region_xs, region_ys = [], []
    new_bounds = []
    rcolors = cycle(('#AE2117', '#EA352B'))
    for seqid, rlist in sorted(giter, cmp=chr_cmp):
        color = colors.next()
        nrows += len(rlist)
        # since chroms are on the same plot. add this chrom to the end of the
        # last chrom
        rcolor = rcolors.next()

        region_xs = [last_x + r['start'] for r in rlist]
        xs.extend(region_xs)
        ys.extend([r['p'] for r in rlist])
        cs.extend([color] * len(rlist))

        if regions and seqid in regions:
            regions_bounds = regions[seqid]
            if len(regions_bounds) < 500:
                region_xys.extend([(last_x + r['start'], r['p'], rcolor) for r in rlist \
                  if any((s - 1 <= r['start'] <= e + 1) for s, e in regions_bounds)])
            else:
                sys.stderr.write("regions for %s > 500, not plotting\n" % seqid)
            # adjust the bounds of each region based on chrom.
            new_bounds.extend([(last_x + s, last_x + e)
                            for s, e in regions_bounds])

        # save the middle of the region to place the label
        chrom_centers.append((seqid, (region_xs[0] + region_xs[-1]) / 2))
        # keep track so that chrs don't overlap.
        last_x = xs[-1]

    xs = np.array(xs)
    ys = np.array(ys) if no_log else -np.log10(ys)

    plt.close()
    f, ax = plt.subplots(1, figsize=(10, 6))

    bonferonni_p = 0.05 / nrows

    if title is not None:
        plt.title(title)

    ax.set_ylabel('' if no_log else '-log10(p)')
    if regions:
        #"""
        # Plot as colored background
        if len(new_bounds) < 32:
            for s, e in new_bounds:
                ax.axvspan(s - 2, e + 2, facecolor='#EA352B',
                           ec='#EA352B', alpha=0.3, zorder=0)
        #"""
        # plot as points.
        rxs, rys, rcs = zip(*region_xys)
        if not no_log: rys = -np.log10(rys)
        ax.scatter(rxs, rys,
                #  s=rys ** 1.3,  # size by -log10(p)
                s = 6,
                c=rcs, edgecolors=rcs,
                zorder=2)

    if lines:
        ax.vlines(xs, 0, ys, colors=cs, alpha=0.5)
    else:
        alpha = 0.8 if len(xs) < 10000 else 0.6
        edgecolors = 'k' if len(xs) < 10000 else 'none'
        ax.scatter(xs, ys, s=3.5, c=cs, edgecolors=edgecolors, alpha=alpha, zorder=1)


    # plot 0.05 line after multiple testing. always nlog10'ed since
    # that's the space we're plotting in.
    if bonferonni:
        ax.axhline(y=-np.log10(bonferonni_p), color='0.5', linewidth=2)
    #plt.axis('tight')
    if max(xs) - min(xs) > 10000:
        plt.xlim(0, xs[-1])
    else:
        plt.xlim(xs[0], xs[-1])
    plt.ylim(ymin=0)
    if ymax is not None: plt.ylim(ymax=ymax)
    plt.xticks([c[1] for c in chrom_centers],
               [c[0].replace('chr', '') for c in chrom_centers], rotation=-90, size=8.5)
    #plt.show()
    print >>sys.stderr, "Bonferonni-corrected p-value for %i rows: %.3g" \
            % (nrows, 0.05 / nrows)
    print >>sys.stderr, "values less than Bonferonni-corrected p-value: %i " \
            % (ys > -np.log10(bonferonni_p)).sum()

    if subplots:
        pys = np.sort(10**-ys) # convert back to actual p-values
        gc = genomic_control(pys)
        ax_qq = f.add_axes((0.74, 0.12, 0.22, 0.22), alpha=0.2)
        ax_qq.text(0.03, 0.88, r'$\lambda : %.3f$' % gc, transform=ax_qq.transAxes)
        qqplot(ys, ax_qq)

        ax_hist = f.add_axes((0.12, 0.12, 0.22, 0.22), frameon=True, alpha=0.6)
        hist(pys, ax_hist)

    print >>sys.stderr, "saving to: %s" % image_path
    f.tight_layout()
    plt.savefig(image_path)

    return image_path
Example #3
0
def pipeline(col_num,
             step,
             dist,
             acf_dist,
             prefix,
             threshold,
             seed,
             bed_files,
             mlog=True,
             region_filter_p=1,
             region_filter_n=None,
             genome_control=False,
             db=None,
             use_fdr=True):
    sys.path.insert(0, op.join(op.dirname(__file__), ".."))
    from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter
    from cpv._common import genome_control_adjust, genomic_control, bediter
    import operator

    if step is None:
        step = min(acf_dist, stepsize.stepsize(bed_files, col_num))
        print >> sys.stderr, "calculated stepsize as: %i" % step

    lags = range(1, acf_dist, step)
    lags.append(lags[-1] + step)

    prefix = prefix.rstrip(".")
    putative_acf_vals = acf.acf(bed_files,
                                lags,
                                col_num,
                                simple=False,
                                mlog=mlog)
    acf_vals = []
    # go out to max requested distance but stop once an autocorrelation
    # < 0.05 is added.
    for a in putative_acf_vals:
        # a is ((lmin, lmax), (corr, N))
        # this heuristic seems to work. stop just above the 0.08 correlation
        # lag.
        if a[1][0] < 0.04 and len(acf_vals) > 2: break
        acf_vals.append(a)
        if a[1][0] < 0.04 and len(acf_vals): break

    # save the arguments that this was called with.
    with open(prefix + ".args.txt", "w") as fh:
        print >> fh, " ".join(sys.argv[1:]) + "\n"
        import datetime
        print >> fh, "date: %s" % datetime.datetime.today()
        from .__init__ import __version__
        print >> fh, "version:", __version__

    with open(prefix + ".acf.txt", "w") as fh:
        acf_vals = acf.write_acf(acf_vals, fh)
        print >> sys.stderr, "wrote: %s" % fh.name
    print >> sys.stderr, "ACF:\n", open(prefix + ".acf.txt").read()

    spvals, opvals = [], []
    with ts.nopen(prefix + ".slk.bed.gz", "w") as fhslk:
        fhslk.write('#chrom\tstart\tend\tp\tregion-p\n')
        for row in slk.adjust_pvals(bed_files, col_num, acf_vals):
            fhslk.write("%s\t%i\t%i\t%.4g\t%.4g\n" % row)
            opvals.append(row[-2])
            spvals.append(row[-1])

    print >> sys.stderr, "# original lambda: %.2f" % genomic_control(opvals)
    del opvals

    gc_lambda = genomic_control(spvals)
    print >> sys.stderr, "wrote: %s with lambda: %.2f" % (fhslk.name,
                                                          gc_lambda)

    if genome_control:
        fhslk = ts.nopen(prefix + ".slk.gc.bed.gz", "w")
        adj = genome_control_adjust(
            [d['p'] for d in bediter(prefix + ".slk.bed.gz", -1)])
        for i, line in enumerate(ts.nopen(prefix + ".slk.bed.gz")):
            print >> fhslk, "%s\t%.5g" % (line.rstrip("\r\n"), adj[i])

        fhslk.close()
        print >> sys.stderr, "wrote: %s" % fhslk.name

    with ts.nopen(prefix + ".fdr.bed.gz", "w") as fh:
        fh.write('#chrom\tstart\tend\tp\tregion-p\tregion-q\n')
        for bh, l in fdr.fdr(fhslk.name, -1):
            fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh))
        print >> sys.stderr, "wrote: %s" % fh.name
    fregions = prefix + ".regions.bed.gz"
    with ts.nopen(fregions, "w") as fh:
        list(
            peaks.peaks(prefix + ".fdr.bed.gz", -1 if use_fdr else -2,
                        threshold, seed, dist, fh, operator.le))
    n_regions = sum(1 for _ in ts.nopen(fregions))
    print >> sys.stderr, "wrote: %s (%i regions)" % (fregions, n_regions)
    if n_regions == 0:
        sys.exit()

    with ts.nopen(prefix + ".regions-p.bed.gz", "w") as fh:
        N = 0
        fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tz_p\tz_sidak_p\n")
        # use -2 for original, uncorrected p-values in slk.bed
        for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p(
                prefix + ".slk.bed.gz", prefix + ".regions.bed.gz", -2, step):
            fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p))
            fh.flush()
            N += int(slk_sidak_p < 0.05)
        print >>sys.stderr, "wrote: %s, (regions with corrected-p < 0.05: %i)" \
                % (fh.name, N)

    regions_bed = fh.name
    header = ts.header(bed_files[0])
    #if all(h in header for h in ('t', 'start', 'end')):
    if region_filter_n is None: region_filter_n = 0
    with ts.nopen(prefix + ".regions-t.bed", "w") as fh:
        N = 0
        for i, toks in enumerate(
                filter.filter(bed_files[0], regions_bed, p_col_name=col_num)):
            if i == 0: toks[0] = "#" + toks[0]
            else:
                if float(toks[6]) > region_filter_p: continue
                if int(toks[4]) < region_filter_n: continue
                #if region_filter_t and "/" in toks[7]:
                #    # t-pos/t-neg. if the lower one is > region_filter_t?
                #    vals = map(int, toks[7].split("/"))
                #    if min(vals) > region_filter_t: continue

                N += 1
            print >> fh, "\t".join(toks)
        print >>sys.stderr, ("wrote: %s, (regions with region-p "
                            "< %.3f and n-probes >= %i: %i)") \
                % (fh.name, region_filter_p, region_filter_n, N)

    try:
        from cpv import manhattan
        regions = manhattan.read_regions(fh.name)

        manhattan.manhattan(prefix + ".slk.bed.gz",
                            3,
                            prefix.rstrip(".") + ".manhattan.png",
                            False, ['#959899', '#484B4C'],
                            "",
                            False,
                            None,
                            regions=regions,
                            bonferonni=False)
    except ImportError:
        pass  # they dont have matplotlib

    if db is not None:
        from cruzdb import Genome
        g = Genome(db)
        lastf = fh.name
        with open(prefix + ".anno.%s.bed" % db, "w") as fh:
            fh.write('#')
            g.annotate(lastf, ("refGene", "cpgIslandExt"),
                       out=fh,
                       feature_strand=True,
                       parallel=len(spvals) > 500)
        print >> sys.stderr, "wrote: %s annotated with %s" % (fh.name, db)
Example #4
0
def manhattan(fname,
              col_num,
              image_path,
              no_log,
              colors,
              title,
              lines,
              ymax,
              bonferonni=False,
              regions=None,
              subplots=False):
    """
    regions is keyed by chromosome with [(start, stop), ...] extents of
    the regions to highlight
    """
    xs, ys, cs = [], [], []
    region_xys = []  # highlight certain regions.
    colors = cycle(colors)
    chrom_centers = []

    last_x = 0
    nrows = 0
    giter = [(seqid, list(rlist)) for seqid, rlist \
        in groupby(bediter(fname, col_num), key=itemgetter('chrom'))]

    region_xs, region_ys = [], []
    new_bounds = []
    rcolors = cycle(('#AE2117', '#EA352B'))
    for seqid, rlist in sorted(giter, cmp=chr_cmp):
        color = colors.next()
        nrows += len(rlist)
        # since chroms are on the same plot. add this chrom to the end of the
        # last chrom
        rcolor = rcolors.next()

        region_xs = [last_x + r['start'] for r in rlist]
        xs.extend(region_xs)
        ys.extend([r['p'] for r in rlist])
        cs.extend([color] * len(rlist))

        if regions and seqid in regions:
            regions_bounds = regions[seqid]
            if len(regions_bounds) < 500:
                region_xys.extend([(last_x + r['start'], r['p'], rcolor) for r in rlist \
                  if any((s - 1 <= r['start'] <= e + 1) for s, e in regions_bounds)])
            else:
                sys.stderr.write("regions for %s > 500, not plotting\n" %
                                 seqid)
            # adjust the bounds of each region based on chrom.
            new_bounds.extend([(last_x + s, last_x + e)
                               for s, e in regions_bounds])

        # save the middle of the region to place the label
        chrom_centers.append((seqid, (region_xs[0] + region_xs[-1]) / 2))
        # keep track so that chrs don't overlap.
        last_x = xs[-1]

    xs = np.array(xs)
    ys = np.array(ys) if no_log else -np.log10(ys)

    plt.close()
    f, ax = plt.subplots(1, figsize=(10, 6))

    bonferonni_p = 0.05 / nrows

    if title is not None:
        plt.title(title)

    ax.set_ylabel('' if no_log else '-log10(p)')
    if regions:
        #"""
        # Plot as colored background
        if len(new_bounds) < 32:
            for s, e in new_bounds:
                ax.axvspan(s - 2,
                           e + 2,
                           facecolor='#EA352B',
                           ec='#EA352B',
                           alpha=0.3,
                           zorder=0)
        #"""
        # plot as points.
        rxs, rys, rcs = zip(*region_xys)
        if not no_log: rys = -np.log10(rys)
        ax.scatter(
            rxs,
            rys,
            #  s=rys ** 1.3,  # size by -log10(p)
            s=6,
            c=rcs,
            edgecolors=rcs,
            zorder=2)

    if lines:
        ax.vlines(xs, 0, ys, colors=cs, alpha=0.5)
    else:
        alpha = 0.8 if len(xs) < 10000 else 0.6
        edgecolors = 'k' if len(xs) < 10000 else 'none'
        ax.scatter(xs,
                   ys,
                   s=3.5,
                   c=cs,
                   edgecolors=edgecolors,
                   alpha=alpha,
                   zorder=1)

    # plot 0.05 line after multiple testing. always nlog10'ed since
    # that's the space we're plotting in.
    if bonferonni:
        ax.axhline(y=-np.log10(bonferonni_p), color='0.5', linewidth=2)
    #plt.axis('tight')
    if max(xs) - min(xs) > 10000:
        plt.xlim(0, xs[-1])
    else:
        plt.xlim(xs[0], xs[-1])
    plt.ylim(ymin=0)
    if ymax is not None: plt.ylim(ymax=ymax)
    plt.xticks([c[1] for c in chrom_centers],
               [c[0].replace('chr', '') for c in chrom_centers],
               rotation=-90,
               size=8.5)
    #plt.show()
    print >>sys.stderr, "Bonferonni-corrected p-value for %i rows: %.3g" \
            % (nrows, 0.05 / nrows)
    print >>sys.stderr, "values less than Bonferonni-corrected p-value: %i " \
            % (ys > -np.log10(bonferonni_p)).sum()

    if subplots:
        pys = np.sort(10**-ys)  # convert back to actual p-values
        gc = genomic_control(pys)
        ax_qq = f.add_axes((0.74, 0.12, 0.22, 0.22), alpha=0.2)
        ax_qq.text(0.03,
                   0.88,
                   r'$\lambda : %.3f$' % gc,
                   transform=ax_qq.transAxes)
        qqplot(ys, ax_qq)

        ax_hist = f.add_axes((0.12, 0.12, 0.22, 0.22), frameon=True, alpha=0.6)
        hist(pys, ax_hist)

    print >> sys.stderr, "saving to: %s" % image_path
    f.tight_layout()
    plt.savefig(image_path)

    return image_path
Example #5
0
def pipeline(col_num, step, dist, prefix, threshold, seed, bed_files, mlog=False,
    region_filter_p=1, region_filter_n=1, genome_control=False, db=None):
    sys.path.insert(0, op.join(op.dirname(__file__), ".."))
    from cpv import acf, slk, fdr, peaks, region_p, stepsize, filter
    from cpv._common import genome_control_adjust, genomic_control, bediter
    import operator


    if step is None:
        step = stepsize.stepsize(bed_files, col_num)
        print >>sys.stderr, "calculated stepsize as: %i" % step

    lags = range(1, dist, step)
    lags.append(lags[-1] + step)

    prefix = prefix.rstrip(".")
    #if genome_control:
    #    with open(prefix + ".adj.bed", "w") as fh:
    #        genome_control_adjust_bed(bed_files, col_num, fh)
    #    bed_files = [fh.name]
    putative_acf_vals = acf.acf(bed_files, lags, col_num, simple=False,
                                mlog=mlog)
    acf_vals = []
    # go out to max requested distance but stop once an autocorrelation
    # < 0.05 is added.
    for a in putative_acf_vals:
        # a is ((lmin, lmax), (corr, N))
        # this heuristic seems to work. stop just above the 0.08 correlation
        # lag.
        if a[1][0] < 0.04 and len(acf_vals) > 2: break
        acf_vals.append(a)
        if a[1][0] < 0.04 and len(acf_vals): break

    # save the arguments that this was called with.
    with open(prefix + ".args.txt", "w") as fh:
        print >>fh, " ".join(sys.argv[1:]) + "\n"
        import datetime
        print >>fh, "date: %s" % datetime.datetime.today()

    with open(prefix + ".acf.txt", "w") as fh:
        acf_vals = acf.write_acf(acf_vals, fh)
        print >>sys.stderr, "wrote: %s" % fh.name
    print >>sys.stderr, "ACF:\n", open(prefix + ".acf.txt").read()

    spvals, opvals = [], []
    with open(prefix + ".slk.bed", "w") as fhslk:

        for row in slk.adjust_pvals(bed_files, col_num, acf_vals):
            fhslk.write("%s\t%i\t%i\t%.4g\t%.4g\n" % row)
            opvals.append(row[-2])
            spvals.append(row[-1])

    print >>sys.stderr, "# original lambda: %.2f" % genomic_control(opvals)
    del opvals

    gc_lambda = genomic_control(spvals)
    print >>sys.stderr, "wrote: %s with lambda: %.2f" % (fhslk.name, gc_lambda)

    if genome_control:
        fhslk = open(prefix + ".slk.gc.bed", "w")
        adj = genome_control_adjust([d['p'] for d in bediter(prefix + ".slk.bed", -1)])
        for i, line in enumerate(open(prefix + ".slk.bed")):
            print >>fhslk, "%s\t%.5g" % (line.rstrip("\r\n"), adj[i])

        fhslk.close()
        print >>sys.stderr, "wrote: %s" % fhslk.name

    with open(prefix + ".fdr.bed", "w") as fh:
        for bh, l in fdr.fdr(fhslk.name, -1):
            fh.write("%s\t%.4g\n" % (l.rstrip("\r\n"), bh))
        print >>sys.stderr, "wrote: %s" % fh.name

    fregions = prefix + ".regions.bed"
    with open(fregions, "w") as fh:
        list(peaks.peaks(prefix + ".fdr.bed", -1, threshold, seed,
            step, fh, operator.le))
    n_regions = sum(1 for _ in open(fregions))
    print >>sys.stderr, "wrote: %s (%i regions)" % (fregions, n_regions)

    with open(prefix + ".regions-p.bed", "w") as fh:
        N = 0
        fh.write("#chrom\tstart\tend\tmin_p\tn_probes\tslk_p\tslk_sidak_p\n")
        # use -2 for original, uncorrected p-values in slk.bed
        for region_line, slk_p, slk_sidak_p, sim_p in region_p.region_p(
                               prefix + ".slk.bed",
                               prefix + ".regions.bed", -2,
                               0, step, mlog=mlog):
            fh.write("%s\t%.4g\t%.4g\n" % (region_line, slk_p, slk_sidak_p))
            fh.flush()
            N += int(slk_sidak_p < 0.05)
        print >>sys.stderr, "wrote: %s, (regions with corrected-p < 0.05: %i)" \
                % (fh.name, N)

    regions_bed = fh.name
    header = (gzip.open(bed_files[0]) if bed_files[0].endswith(".gz")
            else open(bed_files[0])).next().split("\t")
    if all(h in header for h in ('t', 'start', 'end')):
        with open(prefix + ".regions-t.bed", "w") as fh:
            N = 0
            for i, toks in enumerate(filter.filter(bed_files[0], regions_bed,
                p_col_name=col_num)):
                if i == 0: toks[0] = "#" + toks[0]
                else:
                    if float(toks[6]) > region_filter_p: continue
                    if int(toks[4]) < region_filter_n: continue
                    N += 1
                print >>fh, "\t".join(toks)
            print >>sys.stderr, ("wrote: %s, (regions with region-p"
                                "< %.3f and n-probes >= %i: %i)") \
                    % (fh.name, region_filter_p, region_filter_n, N)

    try:
        from cpv import manhattan
        regions = manhattan.read_regions(fh.name)

        manhattan.manhattan(prefix + ".slk.bed", 3, prefix.rstrip(".") + ".manhattan.png",
                         False, ['#959899', '#484B4C'], "", False, None,
                         regions=regions, bonferonni=True)
    except ImportError:
        pass # they dont have matplotlib


    if db is not None:
        from cruzdb import Genome
        g = Genome(db)
        lastf = fh.name
        with open(prefix + ".anno.%s.bed" % db, "w") as fh:
            g.annotate(lastf, ("refGene", "cpgIslandExt", "cytoBand"), out=fh,
                    feature_strand=True, parallel=len(spvals) > 500)
        print >>sys.stderr, "wrote: %s annotated with %s" % (fh.name, db)