def patterson_f3(acc, aca, acb): """Unbiased estimator for F3(C; A, B), the three-population test for admixture in population C. Parameters ---------- acc : array_like, int, shape (n_variants, 2) Allele counts for the test population (C). aca : array_like, int, shape (n_variants, 2) Allele counts for the first source population (A). acb : array_like, int, shape (n_variants, 2) Allele counts for the second source population (B). Returns ------- T : ndarray, float, shape (n_variants,) Un-normalized f3 estimates per variant. B : ndarray, float, shape (n_variants,) Estimates for heterozygosity in population C. Notes ----- See Patterson (2012), main text and Appendix A. For un-normalized f3 statistics, ignore the `B` return value. To compute the f3* statistic, which is normalized by heterozygosity in population C to remove numerical dependence on the allele frequency spectrum, compute ``np.sum(T) / np.sum(B)``. """ # check inputs aca = AlleleCountsArray(aca, copy=False) assert aca.shape[1] == 2, 'only biallelic variants supported' acb = AlleleCountsArray(acb, copy=False) assert acb.shape[1] == 2, 'only biallelic variants supported' acc = AlleleCountsArray(acc, copy=False) assert acc.shape[1] == 2, 'only biallelic variants supported' check_dim0_aligned(aca, acb, acc) # compute allele number and heterozygosity in test population sc = acc.sum(axis=1) hc = h_hat(acc) # compute sample frequencies for the alternate allele a = aca.to_frequencies()[:, 1] b = acb.to_frequencies()[:, 1] c = acc.to_frequencies()[:, 1] # compute estimator T = ((c - a) * (c - b)) - (hc / sc) B = 2 * hc return T, B
def patterson_d(aca, acb, acc, acd): """Unbiased estimator for D(A, B; C, D), the normalised four-population test for admixture between (A or B) and (C or D), also known as the "ABBA BABA" test. Parameters ---------- aca : array_like, int, shape (n_variants, 2), Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. acc : array_like, int, shape (n_variants, 2) Allele counts for population C. acd : array_like, int, shape (n_variants, 2) Allele counts for population D. Returns ------- num : ndarray, float, shape (n_variants,) Numerator (un-normalised f4 estimates). den : ndarray, float, shape (n_variants,) Denominator. Notes ----- See Patterson (2012), main text and Appendix A. For un-normalized f4 statistics, ignore the `den` return value. """ # check inputs aca = AlleleCountsArray(aca, copy=False) assert aca.shape[1] == 2, 'only biallelic variants supported' acb = AlleleCountsArray(acb, copy=False) assert acb.shape[1] == 2, 'only biallelic variants supported' acc = AlleleCountsArray(acc, copy=False) assert acc.shape[1] == 2, 'only biallelic variants supported' acd = AlleleCountsArray(acd, copy=False) assert acd.shape[1] == 2, 'only biallelic variants supported' check_dim0_aligned(aca, acb, acc, acd) # compute sample frequencies for the alternate allele a = aca.to_frequencies()[:, 1] b = acb.to_frequencies()[:, 1] c = acc.to_frequencies()[:, 1] d = acd.to_frequencies()[:, 1] # compute estimator num = (a - b) * (c - d) den = (a + b - (2 * a * b)) * (c + d - (2 * c * d)) return num, den
def patterson_f2(aca, acb): """Unbiased estimator for F2(A, B), the branch length between populations A and B. Parameters ---------- aca : array_like, int, shape (n_variants, 2) Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. Returns ------- f2 : ndarray, float, shape (n_variants,) Notes ----- See Patterson (2012), Appendix A. """ # check inputs aca = AlleleCountsArray(aca, copy=False) assert aca.shape[1] == 2, 'only biallelic variants supported' acb = AlleleCountsArray(acb, copy=False) assert acb.shape[1] == 2, 'only biallelic variants supported' check_dim0_aligned(aca, acb) # compute allele numbers sa = aca.sum(axis=1) sb = acb.sum(axis=1) # compute heterozygosities ha = h_hat(aca) hb = h_hat(acb) # compute sample frequencies for the alternate allele a = aca.to_frequencies()[:, 1] b = acb.to_frequencies()[:, 1] # compute estimator x = ((a - b)**2) - (ha / sa) - (hb / sb) return x
def patterson_f2(aca, acb): """Unbiased estimator for F2(A, B), the branch length between populations A and B. Parameters ---------- aca : array_like, int, shape (n_variants, 2) Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. Returns ------- f2 : ndarray, float, shape (n_variants,) Notes ----- See Patterson (2012), Appendix A. """ # check inputs aca = AlleleCountsArray(aca, copy=False) assert aca.shape[1] == 2, 'only biallelic variants supported' acb = AlleleCountsArray(acb, copy=False) assert acb.shape[1] == 2, 'only biallelic variants supported' check_dim0_aligned(aca, acb) # compute allele numbers sa = aca.sum(axis=1) sb = acb.sum(axis=1) # compute heterozygosities ha = h_hat(aca) hb = h_hat(acb) # compute sample frequencies for the alternate allele a = aca.to_frequencies()[:, 1] b = acb.to_frequencies()[:, 1] # compute estimator x = ((a - b) ** 2) - (ha / sa) - (hb / sb) return x