def average_weir_cockerham_fst(g, subpops, blen, max_allele=None): """Estimate average Fst and standard error using the block-jackknife. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. subpops : sequence of sequences of ints Sample indices for each subpopulation. blen : int Block size (number of variants). max_allele : int, optional The highest allele index to consider. Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele) # calculate overall estimate a_sum = np.nansum(a) b_sum = np.nansum(b) c_sum = np.nansum(c) fst = a_sum / (a_sum + b_sum + c_sum) # compute the numerator and denominator within each block num_bsum = moving_statistic(a, statistic=np.nansum, size=blen) den_bsum = moving_statistic(a + b + c, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def blockwise_weir_cockerham_fst(g, subpops, blen, max_allele=None): """Estimate average Fst and standard error using the block-jackknife. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. subpops : sequence of sequences of ints Sample indices for each subpopulation. blen : int Block size (number of variants). max_allele : int, optional The highest allele index to consider. Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele) # calculate overall estimate a_sum = np.nansum(a) b_sum = np.nansum(b) c_sum = np.nansum(c) fst = a_sum / (a_sum + b_sum + c_sum) # compute the numerator and denominator within each block num_bsum = moving_statistic(a, statistic=np.nansum, size=blen) den_bsum = moving_statistic(a + b + c, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def average_patterson_fst(ac1, ac2, blen): """Estimate average Fst between two populations and standard error using the block-jackknife. Parameters ---------- ac1 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the first population. ac2 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the second population. blen : int Block size (number of variants). Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values num, den = patterson_fst(ac1, ac2) # calculate overall estimate fst = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def blockwise_patterson_fst(ac1, ac2, blen): """Estimate average Fst between two populations and standard error using the block-jackknife. Parameters ---------- ac1 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the first population. ac2 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the second population. blen : int Block size (number of variants). Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values num, den = patterson_fst(ac1, ac2) # calculate overall estimate fst = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def average_patterson_d(aca, acb, acc, acd, blen): """Estimate D(A, B; C, D) and standard error using the block-jackknife. Parameters ---------- aca : array_like, int, shape (n_variants, 2), Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. acc : array_like, int, shape (n_variants, 2) Allele counts for population C. acd : array_like, int, shape (n_variants, 2) Allele counts for population D. blen : int Block size (number of variants). Returns ------- d : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_d """ # calculate per-variant values num, den = patterson_d(aca, acb, acc, acd) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall estimate d_avg = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) # compute Z score z = d_avg / se return d_avg, se, z, vb, vj
def average_patterson_f3(acc, aca, acb, blen, normed=True): """Estimate F3(C; A, B) and standard error using the block-jackknife. Parameters ---------- acc : array_like, int, shape (n_variants, 2) Allele counts for the test population (C). aca : array_like, int, shape (n_variants, 2) Allele counts for the first source population (A). acb : array_like, int, shape (n_variants, 2) Allele counts for the second source population (B). blen : int Block size (number of variants). normed : bool, optional If False, use un-normalised f3 values. Returns ------- f3 : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_f3 """ # calculate per-variant values T, B = patterson_f3(acc, aca, acb) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall value of statistic if normed: f3 = np.nansum(T) / np.nansum(B) else: f3 = np.nanmean(T) # calculate value of statistic within each block if normed: T_bsum = moving_statistic(T, statistic=np.nansum, size=blen) B_bsum = moving_statistic(B, statistic=np.nansum, size=blen) vb = T_bsum / B_bsum _, se, vj = jackknife((T_bsum, B_bsum), statistic=lambda t, b: np.sum(t) / np.sum(b)) else: vb = moving_statistic(T, statistic=np.nanmean, size=blen) _, se, vj = jackknife(vb, statistic=np.mean) # compute Z score z = f3 / se return f3, se, z, vb, vj
def blockwise_patterson_d(aca, acb, acc, acd, blen): """Estimate D(A, B; C, D) and standard error using the block-jackknife. Parameters ---------- aca : array_like, int, shape (n_variants, 2), Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. acc : array_like, int, shape (n_variants, 2) Allele counts for population C. acd : array_like, int, shape (n_variants, 2) Allele counts for population D. blen : int Block size (number of variants). Returns ------- d : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_d """ # calculate per-variant values num, den = patterson_d(aca, acb, acc, acd) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall estimate d = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) # compute Z score z = d / se return d, se, z, vb, vj
def blockwise_patterson_f3(acc, aca, acb, blen, normed=True): """Estimate F3(C; A, B) and standard error using the block-jackknife. Parameters ---------- acc : array_like, int, shape (n_variants, 2) Allele counts for the test population (C). aca : array_like, int, shape (n_variants, 2) Allele counts for the first source population (A). acb : array_like, int, shape (n_variants, 2) Allele counts for the second source population (B). blen : int Block size (number of variants). normed : bool, optional If False, use un-normalised f3 values. Returns ------- f3 : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_f3 """ # calculate per-variant values T, B = patterson_f3(acc, aca, acb) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall value of statistic if normed: f3 = np.nansum(T) / np.nansum(B) else: f3 = np.nanmean(T) # calculate value of statistic within each block if normed: T_bsum = moving_statistic(T, statistic=np.nansum, size=blen) B_bsum = moving_statistic(B, statistic=np.nansum, size=blen) vb = T_bsum / B_bsum _, se, vj = jackknife((T_bsum, B_bsum), statistic=lambda t, b: np.sum(t) / np.sum(b)) else: vb = moving_statistic(T, statistic=np.nanmean, size=blen) _, se, vj = jackknife(vb, statistic=np.mean) # compute Z score z = f3 / se return f3, se, z, vb, vj