def sl_sim(sigma, ps, nsims, sample_distribution=None): N = 0 print >>sys.stderr, "nsims:", nsims w0 = stouffer_liptak(ps, sigma)["p"] # TODO parallelize here. for i in range(10): for prow in gen_correlated(sigma, nsims/10, sample_distribution): s = stouffer_liptak(prow, sigma) if not s["OK"]: 1/0 if s["p"] <= w0: N += 1 return N / float(nsims)
def sl_sim(sigma, ps, nsims, sample_distribution=None): N = 0 print("nsims:", nsims, file=sys.stderr) w0 = stouffer_liptak(ps, sigma)["p"] # TODO parallelize here. for i in range(10): for prow in gen_correlated(sigma, nsims / 10, sample_distribution): s = stouffer_liptak(prow, sigma) if not s["OK"]: 1 / 0 if s["p"] <= w0: N += 1 return N / float(nsims)
def region_p(fpvals, fregions, col_num, nsims, step, mlog=False, z=False): # just use 2 for col_num, but dont need the p from regions. if(sum(1 for _ in open(fregions) if _[0] != "#") == 0): print >>sys.stderr, "no regions in %s" % (fregions, ) sys.exit() process, total_coverage_sync = _get_total_coverage(fpvals, col_num, step) region_info = _get_ps_in_regions(fregions, fpvals, col_num) acfs = _gen_acf(region_info, (fpvals,), col_num, step, mlog=mlog) process.join() total_coverage = total_coverage_sync.value # regions first and then create ACF for the longest one. print >>sys.stderr, "%i bases used as coverage for sidak correction" % \ (total_coverage) sample_distribution = np.array([b["p"] for b in bediter(fpvals, col_num)]) for region_line, region_len, prows in region_info: # gen_sigma expects a list of bed dicts. sigma = gen_sigma_matrix(prows, acfs) ps = np.array([prow["p"] for prow in prows]) if ps.shape[0] == 0: print >>sys.stderr,("bad region", region_line) continue # calculate the SLK for the region. if z: region_slk = z_score_combine(ps, sigma) else: region_slk = stouffer_liptak(ps, sigma) if not region_slk["OK"]: print >>sys.stderr, "problem with:", region_slk, ps slk_p = region_slk["p"] sidak_slk_p = sidak(slk_p, region_len, total_coverage) result = [region_line, slk_p, sidak_slk_p] # corroborate those with p-values < 0.1 by simulation #""" if nsims > 0: # adjust nsims so it's an adjusted p-value. q_nsims = int(0.5 + total_coverage / float(region_len)) assert sample_distribution is not None # trim sigma because we may have trimmed the ps above. sim_p = sl_sim(sigma, ps, q_nsims, sample_distribution) result.append(sim_p) else: result.append("NA") #""" #result.append("NA") yield result
def slk_chrom(chromlist, lag_max, acfs, stringent=False): """ calculate the slk for a given chromosome """ n_bad = 0 for xbed, xneighbors in walk(chromlist, lag_max): sigma = gen_sigma_matrix(xneighbors, acfs) pvals = [g['p'] for g in xneighbors] # stringetn is True/False r = stouffer_liptak(pvals, sigma, correction=stringent) yield (xbed["chrom"], xbed["start"], xbed["end"], xbed["p"], r["p"]) if not r["OK"] and n_bad < 20: print >>sys.stderr, "# non-invertible %s\t%i\t%i" % \ (xbed["chrom"], xbed["start"], xbed["end"]) print >>sys.stderr, "# pvals:", ",".join(map(str, pvals[:10])) + "..." n_bad += 1 if n_bad == 20: print >>sys.stderr, "not reporting further un-invertibles"