def test_sfs_scaled(self): dac = [0, 1, 2, 1] expect = [0, 2, 2] actual = allel.sfs_scaled(dac) aeq(expect, actual) for dtype in 'u2', 'i2', 'u8', 'i8': daca = np.asarray(dac, dtype=dtype) actual = allel.sfs_scaled(daca) aeq(expect, actual)
def test_sfs_scaled(): dac = [0, 1, 2, 1] expect = [0, 2, 2] actual = allel.sfs_scaled(dac) assert_array_equal(expect, actual) for dtype in 'u2', 'i2', 'u8', 'i8': daca = np.asarray(dac, dtype=dtype) actual = allel.sfs_scaled(daca) assert_array_equal(expect, actual) # explicitly provide number of chromosomes expect = [0, 2, 2, 0] actual = allel.sfs_scaled(dac, n=3) assert_array_equal(expect, actual) with pytest.raises(ValueError): allel.sfs_scaled(dac, n=1)
def sfs_plot(c, ac_subpops, save=True, fold=True, scale=True): """ note: should filter on segregating if only using subset of pops note: only biallelic if >1 allele is_biallelic_01 = ac_seg['all'].is_biallelic_01()[:] ac1 = ac_seg['BFM'].compress(is_biallelic_01, axis=0)[:, :2] ac2 = ac_seg['AOM'].compress(is_biallelic_01, axis=0)[:, :2] """ sfsdict = {} fig, ax = plt.subplots(figsize=(8, 5)) sns.despine(ax=ax, offset=10) for pop in ac_subpops.keys(): acu = ac_subpops[pop] flt = acu.is_segregating() & (acu.max_allele() == 1) print('SFS : retaining', np.count_nonzero(flt), 'SNPs') # ac1 = allel.AlleleCountsArray(ac_subpops[pop].compress(flt, axis=0)[:, :2]) ac1 = allel.AlleleCountsArray(ac_subpops[pop].compress(flt, axis=0)) if fold and scale: sfs = allel.sfs_folded_scaled(ac1) elif fold and not scale: sfs = allel.sfs_folded(ac1) elif not fold and not scale: sfs = allel.sfs(ac1[:, 1]) elif not fold and scale: sfs = allel.sfs_scaled(ac1[:, 1]) sfsdict[pop] = sfs allel.stats.plot_sfs_folded_scaled(sfsdict[pop], ax=ax, label=pop, n=ac1.sum(axis=1).max()) ax.legend() ax.set_title('{} Scaled folded site frequency spectra'.format(c)) ax.set_xlabel('minor allele frequency') if save: fig.savefig("ScaledSFS-{}.pdf".format(c), bbox_inches='tight') return(sfsdict)
def sfs(haplotype, ac, nindiv=None, folded=False): """ Compute sfs for SNP matrix """ if nindiv == None: nindiv = haplotype.shape[1] tmp_df = pd.DataFrame({"N_indiv": range(1, nindiv)}) if folded: df_sfs = pd.DataFrame(allel.sfs_folded(ac), columns=["count_SNP"]) df_sfs["i_xi"] = allel.sfs_folded_scaled(ac) df_sfs.index.name = "N_indiv" df_sfs.reset_index(inplace=True) df_sfs = df_sfs.merge(tmp_df, on="N_indiv", how="right").fillna(0).astype(int) else: df_sfs = pd.DataFrame(allel.sfs(ac.T[1]), columns=["count_SNP"]) df_sfs["i_xi"] = allel.sfs_scaled(ac.T[1]) df_sfs.index.name = "N_indiv" df_sfs.reset_index(inplace=True) df_sfs = df_sfs.merge(tmp_df, on="N_indiv", how="right").fillna(0).astype(int) df_sfs["freq_indiv"] = df_sfs.N_indiv / nindiv return df_sfs