def region_p(fpvals, fregions, col_num, nsims, step, mlog=False, z=False): # just use 2 for col_num, but dont need the p from regions. if(sum(1 for _ in open(fregions) if _[0] != "#") == 0): print >>sys.stderr, "no regions in %s" % (fregions, ) sys.exit() process, total_coverage_sync = _get_total_coverage(fpvals, col_num, step) region_info = _get_ps_in_regions(fregions, fpvals, col_num) acfs = _gen_acf(region_info, (fpvals,), col_num, step, mlog=mlog) process.join() total_coverage = total_coverage_sync.value # regions first and then create ACF for the longest one. print >>sys.stderr, "%i bases used as coverage for sidak correction" % \ (total_coverage) sample_distribution = np.array([b["p"] for b in bediter(fpvals, col_num)]) for region_line, region_len, prows in region_info: # gen_sigma expects a list of bed dicts. sigma = gen_sigma_matrix(prows, acfs) ps = np.array([prow["p"] for prow in prows]) if ps.shape[0] == 0: print >>sys.stderr,("bad region", region_line) continue # calculate the SLK for the region. if z: region_slk = z_score_combine(ps, sigma) else: region_slk = stouffer_liptak(ps, sigma) if not region_slk["OK"]: print >>sys.stderr, "problem with:", region_slk, ps slk_p = region_slk["p"] sidak_slk_p = sidak(slk_p, region_len, total_coverage) result = [region_line, slk_p, sidak_slk_p] # corroborate those with p-values < 0.1 by simulation #""" if nsims > 0: # adjust nsims so it's an adjusted p-value. q_nsims = int(0.5 + total_coverage / float(region_len)) assert sample_distribution is not None # trim sigma because we may have trimmed the ps above. sim_p = sl_sim(sigma, ps, q_nsims, sample_distribution) result.append(sim_p) else: result.append("NA") #""" #result.append("NA") yield result
def region_p(fpvals, fregions, col_num, step, z=True): # just use 2 for col_num, but dont need the p from regions. tree = read_regions(fregions) process, total_coverage_sync = _get_total_coverage(fpvals, col_num, step) region_info = _get_ps_in_regions(tree, fpvals, col_num) acfs = _gen_acf(region_info, (fpvals, ), col_num, step) process.join() total_coverage = total_coverage_sync.value # regions first and then create ACF for the longest one. print("%i bases used as coverage for sidak correction" % \ (total_coverage), file=sys.stderr) sample_distribution = np.array([b["p"] for b in bediter(fpvals, col_num)]) combine = z_score_combine if z else stouffer_liptak for region, prows in region_info.items(): # gen_sigma expects a list of bed dicts. sigma = gen_sigma_matrix(prows, acfs) ps = np.array([prow["p"] for prow in prows]) if ps.shape[0] == 0: print("bad region", region, file=sys.stderr) continue # calculate the SLK for the region. region_slk = combine(ps, sigma) if not region_slk["OK"]: print("problem with:", region_slk, ps, file=sys.stderr) slk_p = region_slk["p"] sidak_slk_p = sidak(slk_p, int(region[2]) - int(region[1]), total_coverage) result = ["\t".join(region), slk_p, sidak_slk_p, "NA"] yield result
def region_p(fpvals, fregions, col_num, step, z=True): # just use 2 for col_num, but dont need the p from regions. tree = read_regions(fregions) process, total_coverage_sync = _get_total_coverage(fpvals, col_num, step) region_info = _get_ps_in_regions(tree, fpvals, col_num) acfs = _gen_acf(region_info, (fpvals,), col_num, step) process.join() total_coverage = total_coverage_sync.value # regions first and then create ACF for the longest one. print >>sys.stderr, "%i bases used as coverage for sidak correction" % \ (total_coverage) sample_distribution = np.array([b["p"] for b in bediter(fpvals, col_num)]) combine = z_score_combine if z else stouffer_liptak for region, prows in region_info.iteritems(): # gen_sigma expects a list of bed dicts. sigma = gen_sigma_matrix(prows, acfs) ps = np.array([prow["p"] for prow in prows]) if ps.shape[0] == 0: print >>sys.stderr,("bad region", region) continue # calculate the SLK for the region. region_slk = combine(ps, sigma) if not region_slk["OK"]: print >>sys.stderr, "problem with:", region_slk, ps slk_p = region_slk["p"] sidak_slk_p = sidak(slk_p, int(region[2]) - int(region[1]), total_coverage) result = ["\t".join(region), slk_p, sidak_slk_p, "NA"] yield result