Beispiel #1
0
def region_p(fpvals, fregions, col_num, nsims, step, mlog=False, z=False):
    # just use 2 for col_num, but dont need the p from regions.

    if(sum(1 for _ in open(fregions) if _[0] != "#") == 0):
        print >>sys.stderr, "no regions in %s" % (fregions, )
        sys.exit()

    process, total_coverage_sync = _get_total_coverage(fpvals, col_num, step)
    region_info = _get_ps_in_regions(fregions, fpvals, col_num)

    acfs = _gen_acf(region_info, (fpvals,), col_num, step, mlog=mlog)
    process.join()
    total_coverage = total_coverage_sync.value

    # regions first and then create ACF for the longest one.
    print >>sys.stderr, "%i bases used as coverage for sidak correction" % \
                                (total_coverage)
    sample_distribution = np.array([b["p"] for b in bediter(fpvals,
                                                                col_num)])
    for region_line, region_len, prows in region_info:
        # gen_sigma expects a list of bed dicts.
        sigma = gen_sigma_matrix(prows, acfs)
        ps = np.array([prow["p"] for prow in prows])
        if ps.shape[0] == 0:
            print >>sys.stderr,("bad region", region_line)
            continue

        # calculate the SLK for the region.

        if z:
            region_slk = z_score_combine(ps, sigma)
        else:
            region_slk = stouffer_liptak(ps, sigma)

        if not region_slk["OK"]:
            print >>sys.stderr, "problem with:", region_slk, ps

        slk_p = region_slk["p"]

        sidak_slk_p = sidak(slk_p, region_len, total_coverage)

        result = [region_line, slk_p, sidak_slk_p]

        # corroborate those with p-values < 0.1 by simulation
        #"""
        if nsims > 0:

            # adjust nsims so it's an adjusted p-value.
            q_nsims = int(0.5 + total_coverage / float(region_len))
            assert sample_distribution is not None
            # trim sigma because we may have trimmed the ps above.
            sim_p = sl_sim(sigma, ps, q_nsims, sample_distribution)
            result.append(sim_p)
        else:
            result.append("NA")
        #"""
        #result.append("NA")
        yield result
Beispiel #2
0
def slk_chrom(chromlist, lag_max, acfs, z=True):
    """
    calculate the slk for a given chromosome
    """
    for xbed, xneighbors in walk(chromlist, lag_max):

        sigma = gen_sigma_matrix(xneighbors, acfs)
        pvals = [g['p'] for g in xneighbors]
        r = z_score_combine(pvals, sigma)
        # NOTE: this commented out line show slightly better performance on
        # simulated data with largish changes.
        # take the min of the original and the smoothed pvalue.
        #p = 1 - (1 - min(xbed['p'], r['p']))**2
        #yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"], p)
        yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"], r["p"])
Beispiel #3
0
def slk_chrom(chromlist, lag_max, acfs, z=True):
    """
    calculate the slk for a given chromosome
    """
    for xbed, xneighbors in walk(chromlist, lag_max):

        sigma = gen_sigma_matrix(xneighbors, acfs)
        pvals = [g['p'] for g in xneighbors]
        r = z_score_combine(pvals, sigma)
        # NOTE: this commented out line show slightly better performance on
        # simulated data with largish changes.
        # take the min of the original and the smoothed pvalue.
        #p = 1 - (1 - min(xbed['p'], r['p']))**2
        #yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"], p)
        yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"], r["p"])
Beispiel #4
0
def slk_chrom(chromlist, lag_max, acfs, z=True):
    """
    calculate the slk for a given chromosome
    """
    arr = np.empty((len(chromlist), ),
                   dtype=np.dtype([('start', np.uint32), ('end', np.uint32),
                                   ('p', np.float32), ('slk_p', np.float32)]))

    for i, (xbed, xneighbors) in enumerate(walk(chromlist, lag_max)):

        sigma = gen_sigma_matrix(xneighbors, acfs)
        pvals = np.array([g['p'] for g in xneighbors])
        r = z_score_combine(pvals, sigma)
        arr[i] = (xbed["start"], xbed["end"], xbed["p"], r["p"])
    return xbed['chrom'], arr
Beispiel #5
0
def slk_chrom(chromlist, lag_max, acfs, z=True):
    """
    calculate the slk for a given chromosome
    """
    arr = np.empty((len(chromlist),),  dtype=np.dtype([
        ('start', np.uint32),
        ('end', np.uint32),
        ('p', np.float32),
        ('slk_p', np.float32)]))

    for i, (xbed, xneighbors) in enumerate(walk(chromlist, lag_max)):

        sigma = gen_sigma_matrix(xneighbors, acfs)
        pvals = np.array([g['p'] for g in xneighbors])
        r = z_score_combine(pvals, sigma)
        arr[i] = (xbed["start"], xbed["end"], xbed["p"], r["p"])
    return xbed['chrom'], arr
Beispiel #6
0
def slk_chrom(chromlist, lag_max, acfs, z=False):
    """
    calculate the slk for a given chromosome
    """
    n_bad = 0
    for xbed, xneighbors in walk(chromlist, lag_max):

        sigma = gen_sigma_matrix(xneighbors, acfs)
        pvals = [g['p'] for g in xneighbors]
        # stringetn is True/False
        if z:
            r = z_score_combine(pvals, sigma)
        else:
            r = stouffer_liptak(pvals, sigma)

        yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"],
                r["p"])
        if not r["OK"] and n_bad < 20:
            print >>sys.stderr, "# non-invertible %s\t%i\t%i" % \
                    (xbed["chrom"], xbed["start"], xbed["end"])
            print >>sys.stderr, "# pvals:", ",".join(map(str, pvals[:10])) + "..." 
            n_bad += 1
            if n_bad == 20:
                print >>sys.stderr, "not reporting further un-invertibles"