def test_masked_windowed_divergence(self): h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) mask = np.tile(np.repeat(np.array([True, False]), 5), 3) expect, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=5, start=1, stop=31) expect = expect[::2] actual, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=10, start=1, stop=31, is_accessible=mask) assert_array_almost_equal(expect, actual)
def pairDxy(c, chrsize, ac_subpops, pos, pop2color, plot=False, blenw=10000, nwindow=100): """Calculates DXY """ dxydict = {} windlen = int(chrsize / nwindow) for x, y in combinations(ac_subpops.keys(), 2): # segregating only ? acu = ac_subpops[x] + ac_subpops[y] flt = acu.is_segregating() & (acu.max_allele() == 1) print("{} retaining {} SNPs".format("{}-{}".format(x, y), np.count_nonzero(flt))) posflt = pos[flt] ac1 = allel.AlleleCountsArray(ac_subpops[x].compress(flt, axis=0)[:, :2]) ac2 = allel.AlleleCountsArray(ac_subpops[y].compress(flt, axis=0)[:, :2]) # all sites # ac1 = ac_subpops[x] # ac2 = ac_subpops[y] # posflt = pos # whole chrom dxy = allel.windowed_divergence(posflt, ac1, ac2, size=blenw, start=1, stop=chrsize) dxy_m, dxy_se, *f = jackknife(dxy[0]) dxy_windowed = allel.windowed_divergence(posflt, ac1, ac2, size=windlen, start=1, stop=chrsize) dxy4plot = (dxy_windowed[0], dxy_windowed[1]) dxydict["{}-{}".format(x, y)] = (dxy_m, dxy_se, dxy4plot) if plot: plot_dxy(dxydict, pop2color, list(ac_subpops.keys()), c, chrsize) return(dxydict)
def dxy(p1, pos, gt, win_size, length_bp): """Calculate pairwise divergence between two populations. Parameters ---------- p1 : int size of subpop1. pos : TYPE DESCRIPTION. gt : TYPE DESCRIPTION. win_size : TYPE DESCRIPTION. length_bp : TYPE DESCRIPTION. Returns ------- dxy_win : TYPE DESCRIPTION. """ ac1, ac2, pos_s = get_ac_seg(p1, pos, gt) dxy_win = allel.windowed_divergence(pos_s, ac1, ac2, size=win_size, start=1, stop=length_bp) return dxy_win[0]
def test_windowed_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() # mean pairwise divergence # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(6 / 4) / 10, (9 / 4) / 10, 0 / 11] actual, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=10, start=1, stop=31) assert_array_almost_equal(expect, actual)
size=winsize, start=start, stop=stop, step=int(winsize / 2)) new_dat = format_results(stat=tajD, stat_name="tajD", chrom=chrom, windows=windows, nvar=counts, pop=pop) df_list.append(new_dat) if 'dxy' in args.s and args.p2 != "None": dxy, windows, n_bases, counts = allel.windowed_divergence( pos, ac, ac2, size=winsize, start=start, stop=stop, step=int(winsize / 2)) new_dat = format_results(stat=dxy, stat_name="dxy", chrom=chrom, windows=windows, nvar=counts, pop=pop) df_list.append(new_dat) if 'FD' in args.s and args.p2 != "None": FD, windows, n_bases, counts = allel.windowed_df( pos, ac, ac2,
# # Maybe if we look at Dxy we see something clearer? # In[28]: clu_varbool = np.logical_and(oc_genvars_seg["POS"] > loc_start - 1e5, oc_genvars_seg["POS"] <= loc_end + 1e5) clu_ehh_pos = oc_genvars_seg["POS"].subset(sel0=clu_varbool) size = 5000 step = 1000 # divergence between col-296G and gam-296G on 0 background dxy_div_col02_gam02 = allel.windowed_divergence( ac1=oc_genalco_sps_seg_inv_gty["col_0_2"].subset(sel0=clu_varbool), ac2=oc_genalco_sps_seg_inv_gty["gam_0_2"].subset(sel0=clu_varbool), pos=oc_genvars_seg["POS"].subset(sel0=clu_varbool), size=size, step=step) # divergence between col-296G and gam-wt on 0 background dxy_div_col02_gam00 = allel.windowed_divergence( ac1=oc_genalco_sps_seg_inv_gty["col_0_2"].subset(sel0=clu_varbool), ac2=oc_genalco_sps_seg_inv_gty["gam_0_0"].subset(sel0=clu_varbool), pos=oc_genvars_seg["POS"].subset(sel0=clu_varbool), size=size, step=step) # divergence between col-wt and gam-296G on 0 background dxy_div_col00_gam02 = allel.windowed_divergence( ac1=oc_genalco_sps_seg_inv_gty["col_0_0"].subset(sel0=clu_varbool), ac2=oc_genalco_sps_seg_inv_gty["gam_0_2"].subset(sel0=clu_varbool),
def win_pi_sims(path, neut_mut, n_pops, n_sims, T, win_size, L, N): foname = os.path.basename(path[:-1]) print(("Base filename:" + foname), flush=True) x = np.arange(n_pops) combs = list(itertools.combinations(x, 2)) pis = np.zeros((len(T), n_sims, n_pops, int(L / win_size))) div = np.zeros((len(T), n_sims, len(combs), int(L / win_size))) fst = np.zeros((len(T), n_sims, len(combs), int(L / win_size))) tajd = np.zeros((len(T), n_sims, n_pops, int(L / win_size))) for t in range(len(T)): for i in range(n_sims): files = glob(path + str(T[t]) + "N_sim_" + str(i) + "_RAND_*[0-9]_overlaid.trees") print(files) assert (len(files) == 1), str( len(files)) + " file(s) found with glob T: " + str( T[t]) + " sim:" + str(i) filename = files[0] print(filename) ts = pyslim.load(filename).simplify() #print(("Pi0: ", ts.pairwise_diversity(samples=ts.samples(population=0)),"Pi1: ", ts.pairwise_diversity(samples=ts.samples(population=1))), flush=True) s1 = timer() acs, pos = ac_from_ts(ts, n_pops, N) for j in range(n_pops): pi, windows, n_bases, counts = allel.windowed_diversity( pos, acs[j], size=win_size, start=1, stop=L) pis[t, i, j, :] = pi D, windows, counts = allel.windowed_tajima_d(pos, acs[j], size=win_size, start=1, stop=L) tajd[t, i, j, :] = D s2 = timer() print(("Calculating windowed Pi/TajD... Time elapsed (min):" + str(round((s2 - s1) / 60, 3))), flush=True) s1 = timer() for k in range(len(combs)): dxy, windows, n_bases, counts = allel.windowed_divergence( pos, acs[combs[k][0]], acs[combs[k][1]], size=win_size, start=1, stop=L) div[t, i, k, :] = dxy fstat, windows, counts = allel.windowed_hudson_fst( pos, acs[combs[k][0]], acs[combs[k][1]], size=win_size, start=1, stop=L) fst[t, i, k, :] = fstat s2 = timer() print(("Calculating windowed Dxy and Fst... Time elapsed (min):" + str(round((s2 - s1) / 60, 3))), flush=True) s1 = timer() print((pis.shape), flush=True) print((tajd.shape), flush=True) print((div.shape), flush=True) output = open(path + foname + '_pis.pkl', 'wb') pickle.dump(pis, output) output.close() output = open(path + foname + '_tajd.pkl', 'wb') pickle.dump(tajd, output) output.close() output = open(path + foname + '_div.pkl', 'wb') pickle.dump(div, output) output.close() output = open(path + foname + '_fst.pkl', 'wb') pickle.dump(fst, output) output.close() if (0): plt.subplot(2, 1, 1) plt.plot(np.transpose(pis[0, 0, :]), "-") plt.title('0N after split') plt.ylabel('Pi') plt.subplot(2, 1, 2) plt.plot(np.transpose(pis[9, 0, :]), "-") plt.title('10N after split') plt.xlabel('Window') plt.ylabel('Pi') plt.tight_layout() plt.savefig(path + foname + '_landscape.pdf') plt.close() s2 = timer() print(("Saving stats and plots to file... Time elapsed (min):" + str(round((s2 - s1) / 60, 3))), flush=True)