def sl_sim(sigma, ps, nsims, sample_distribution=None):
    N = 0
    print >>sys.stderr, "nsims:", nsims
    w0 = stouffer_liptak(ps, sigma)["p"]
    # TODO parallelize here.
    for i in range(10):
        for prow in gen_correlated(sigma, nsims/10, sample_distribution):
            s = stouffer_liptak(prow, sigma)
            if not s["OK"]: 1/0
            if s["p"] <= w0: N += 1

    return N / float(nsims)
def sl_sim(sigma, ps, nsims, sample_distribution=None):
    N = 0
    print("nsims:", nsims, file=sys.stderr)
    w0 = stouffer_liptak(ps, sigma)["p"]
    # TODO parallelize here.
    for i in range(10):
        for prow in gen_correlated(sigma, nsims / 10, sample_distribution):
            s = stouffer_liptak(prow, sigma)
            if not s["OK"]: 1 / 0
            if s["p"] <= w0: N += 1

    return N / float(nsims)
def region_p(fpvals, fregions, col_num, nsims, step, mlog=False, z=False):
    # just use 2 for col_num, but dont need the p from regions.

    if(sum(1 for _ in open(fregions) if _[0] != "#") == 0):
        print >>sys.stderr, "no regions in %s" % (fregions, )
        sys.exit()

    process, total_coverage_sync = _get_total_coverage(fpvals, col_num, step)
    region_info = _get_ps_in_regions(fregions, fpvals, col_num)

    acfs = _gen_acf(region_info, (fpvals,), col_num, step, mlog=mlog)
    process.join()
    total_coverage = total_coverage_sync.value

    # regions first and then create ACF for the longest one.
    print >>sys.stderr, "%i bases used as coverage for sidak correction" % \
                                (total_coverage)
    sample_distribution = np.array([b["p"] for b in bediter(fpvals,
                                                                col_num)])
    for region_line, region_len, prows in region_info:
        # gen_sigma expects a list of bed dicts.
        sigma = gen_sigma_matrix(prows, acfs)
        ps = np.array([prow["p"] for prow in prows])
        if ps.shape[0] == 0:
            print >>sys.stderr,("bad region", region_line)
            continue

        # calculate the SLK for the region.

        if z:
            region_slk = z_score_combine(ps, sigma)
        else:
            region_slk = stouffer_liptak(ps, sigma)

        if not region_slk["OK"]:
            print >>sys.stderr, "problem with:", region_slk, ps

        slk_p = region_slk["p"]

        sidak_slk_p = sidak(slk_p, region_len, total_coverage)

        result = [region_line, slk_p, sidak_slk_p]

        # corroborate those with p-values < 0.1 by simulation
        #"""
        if nsims > 0:

            # adjust nsims so it's an adjusted p-value.
            q_nsims = int(0.5 + total_coverage / float(region_len))
            assert sample_distribution is not None
            # trim sigma because we may have trimmed the ps above.
            sim_p = sl_sim(sigma, ps, q_nsims, sample_distribution)
            result.append(sim_p)
        else:
            result.append("NA")
        #"""
        #result.append("NA")
        yield result
def slk_chrom(chromlist, lag_max, acfs, stringent=False):
    """
    calculate the slk for a given chromosome
    """
    n_bad = 0
    for xbed, xneighbors in walk(chromlist, lag_max):

        sigma = gen_sigma_matrix(xneighbors, acfs)
        pvals = [g['p'] for g in xneighbors]
        # stringetn is True/False
        r = stouffer_liptak(pvals, sigma, correction=stringent)

        yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"],
                r["p"])
        if not r["OK"] and n_bad < 20:
            print >>sys.stderr, "# non-invertible %s\t%i\t%i" % \
                    (xbed["chrom"], xbed["start"], xbed["end"])
            print >>sys.stderr, "# pvals:", ",".join(map(str, pvals[:10])) + "..." 
            n_bad += 1
            if n_bad == 20:
                print >>sys.stderr, "not reporting further un-invertibles"