Python jackknife Examples, allel.stats.misc.jackknife Python Examples

Example #1

0

Show file

File: fst.py Project: yangmqglobe/scikit-allel

def average_weir_cockerham_fst(g, subpops, blen, max_allele=None):
    """Estimate average Fst and standard error using the block-jackknife.

    Parameters
    ----------
    g : array_like, int, shape (n_variants, n_samples, ploidy)
        Genotype array.
    subpops : sequence of sequences of ints
        Sample indices for each subpopulation.
    blen : int
        Block size (number of variants).
    max_allele : int, optional
        The highest allele index to consider.

    Returns
    -------
    fst : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    """

    # calculate per-variant values
    a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele)

    # calculate overall estimate
    a_sum = np.nansum(a)
    b_sum = np.nansum(b)
    c_sum = np.nansum(c)
    fst = a_sum / (a_sum + b_sum + c_sum)

    # compute the numerator and denominator within each block
    num_bsum = moving_statistic(a, statistic=np.nansum, size=blen)
    den_bsum = moving_statistic(a + b + c, statistic=np.nansum, size=blen)

    # calculate the statistic values in each block
    vb = num_bsum / den_bsum

    # estimate standard error
    _, se, vj = jackknife((num_bsum, den_bsum),
                          statistic=lambda n, d: np.sum(n) / np.sum(d))

    return fst, se, vb, vj

Example #2

0

Show file

File: fst.py Project: podpearson/scikit-allel

def blockwise_weir_cockerham_fst(g, subpops, blen, max_allele=None):
    """Estimate average Fst and standard error using the block-jackknife.

    Parameters
    ----------
    g : array_like, int, shape (n_variants, n_samples, ploidy)
        Genotype array.
    subpops : sequence of sequences of ints
        Sample indices for each subpopulation.
    blen : int
        Block size (number of variants).
    max_allele : int, optional
        The highest allele index to consider.

    Returns
    -------
    fst : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    """

    # calculate per-variant values
    a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele)

    # calculate overall estimate
    a_sum = np.nansum(a)
    b_sum = np.nansum(b)
    c_sum = np.nansum(c)
    fst = a_sum / (a_sum + b_sum + c_sum)

    # compute the numerator and denominator within each block
    num_bsum = moving_statistic(a, statistic=np.nansum, size=blen)
    den_bsum = moving_statistic(a + b + c, statistic=np.nansum, size=blen)

    # calculate the statistic values in each block
    vb = num_bsum / den_bsum

    # estimate standard error
    _, se, vj = jackknife((num_bsum, den_bsum),
                          statistic=lambda n, d: np.sum(n) / np.sum(d))

    return fst, se, vb, vj

Example #3

0

Show file

File: fst.py Project: yangmqglobe/scikit-allel

def average_patterson_fst(ac1, ac2, blen):
    """Estimate average Fst between two populations and standard error using
    the block-jackknife.

    Parameters
    ----------
    ac1 : array_like, int, shape (n_variants, n_alleles)
        Allele counts array from the first population.
    ac2 : array_like, int, shape (n_variants, n_alleles)
        Allele counts array from the second population.
    blen : int
        Block size (number of variants).

    Returns
    -------
    fst : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    """

    # calculate per-variant values
    num, den = patterson_fst(ac1, ac2)

    # calculate overall estimate
    fst = np.nansum(num) / np.nansum(den)

    # compute the numerator and denominator within each block
    num_bsum = moving_statistic(num, statistic=np.nansum, size=blen)
    den_bsum = moving_statistic(den, statistic=np.nansum, size=blen)

    # calculate the statistic values in each block
    vb = num_bsum / den_bsum

    # estimate standard error
    _, se, vj = jackknife((num_bsum, den_bsum),
                          statistic=lambda n, d: np.sum(n) / np.sum(d))

    return fst, se, vb, vj

Example #4

0

Show file

File: fst.py Project: podpearson/scikit-allel

def blockwise_patterson_fst(ac1, ac2, blen):
    """Estimate average Fst between two populations and standard error using
    the block-jackknife.

    Parameters
    ----------
    ac1 : array_like, int, shape (n_variants, n_alleles)
        Allele counts array from the first population.
    ac2 : array_like, int, shape (n_variants, n_alleles)
        Allele counts array from the second population.
    blen : int
        Block size (number of variants).

    Returns
    -------
    fst : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    """

    # calculate per-variant values
    num, den = patterson_fst(ac1, ac2)

    # calculate overall estimate
    fst = np.nansum(num) / np.nansum(den)

    # compute the numerator and denominator within each block
    num_bsum = moving_statistic(num, statistic=np.nansum, size=blen)
    den_bsum = moving_statistic(den, statistic=np.nansum, size=blen)

    # calculate the statistic values in each block
    vb = num_bsum / den_bsum

    # estimate standard error
    _, se, vj = jackknife((num_bsum, den_bsum),
                          statistic=lambda n, d: np.sum(n) / np.sum(d))

    return fst, se, vb, vj

Example #5

0

Show file

File: admixture.py Project: yangmqglobe/scikit-allel

def average_patterson_d(aca, acb, acc, acd, blen):
    """Estimate D(A, B; C, D) and standard error using the block-jackknife.

    Parameters
    ----------
    aca : array_like, int, shape (n_variants, 2),
        Allele counts for population A.
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for population B.
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for population C.
    acd : array_like, int, shape (n_variants, 2)
        Allele counts for population D.
    blen : int
        Block size (number of variants).

    Returns
    -------
    d : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    z : float
        Z-score (number of standard errors from zero).
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    See Also
    --------
    allel.stats.admixture.patterson_d

    """

    # calculate per-variant values
    num, den = patterson_d(aca, acb, acc, acd)

    # N.B., nans can occur if any of the populations have completely missing
    # genotype calls at a variant (i.e., allele number is zero). Here we
    # assume that is rare enough to be negligible.

    # calculate overall estimate
    d_avg = np.nansum(num) / np.nansum(den)

    # compute the numerator and denominator within each block
    num_bsum = moving_statistic(num, statistic=np.nansum, size=blen)
    den_bsum = moving_statistic(den, statistic=np.nansum, size=blen)

    # calculate the statistic values in each block
    vb = num_bsum / den_bsum

    # estimate standard error
    _, se, vj = jackknife((num_bsum, den_bsum),
                          statistic=lambda n, d: np.sum(n) / np.sum(d))

    # compute Z score
    z = d_avg / se

    return d_avg, se, z, vb, vj

Example #6

0

Show file

File: admixture.py Project: yangmqglobe/scikit-allel

def average_patterson_f3(acc, aca, acb, blen, normed=True):
    """Estimate F3(C; A, B) and standard error using the block-jackknife.

    Parameters
    ----------
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for the test population (C).
    aca : array_like, int, shape (n_variants, 2)
        Allele counts for the first source population (A).
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for the second source population (B).
    blen : int
        Block size (number of variants).
    normed : bool, optional
        If False, use un-normalised f3 values.

    Returns
    -------
    f3 : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    z : float
        Z-score (number of standard errors from zero).
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    See Also
    --------
    allel.stats.admixture.patterson_f3

    """

    # calculate per-variant values
    T, B = patterson_f3(acc, aca, acb)

    # N.B., nans can occur if any of the populations have completely missing
    # genotype calls at a variant (i.e., allele number is zero). Here we
    # assume that is rare enough to be negligible.

    # calculate overall value of statistic
    if normed:
        f3 = np.nansum(T) / np.nansum(B)
    else:
        f3 = np.nanmean(T)

    # calculate value of statistic within each block
    if normed:
        T_bsum = moving_statistic(T, statistic=np.nansum, size=blen)
        B_bsum = moving_statistic(B, statistic=np.nansum, size=blen)
        vb = T_bsum / B_bsum
        _, se, vj = jackknife((T_bsum, B_bsum),
                              statistic=lambda t, b: np.sum(t) / np.sum(b))

    else:
        vb = moving_statistic(T, statistic=np.nanmean, size=blen)
        _, se, vj = jackknife(vb, statistic=np.mean)

    # compute Z score
    z = f3 / se

    return f3, se, z, vb, vj

Example #7

0

Show file

File: admixture.py Project: oxpeter/scikit-allel

def blockwise_patterson_d(aca, acb, acc, acd, blen):
    """Estimate D(A, B; C, D) and standard error using the block-jackknife.

    Parameters
    ----------
    aca : array_like, int, shape (n_variants, 2),
        Allele counts for population A.
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for population B.
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for population C.
    acd : array_like, int, shape (n_variants, 2)
        Allele counts for population D.
    blen : int
        Block size (number of variants).

    Returns
    -------
    d : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    z : float
        Z-score (number of standard errors from zero).
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    See Also
    --------
    allel.stats.admixture.patterson_d

    """

    # calculate per-variant values
    num, den = patterson_d(aca, acb, acc, acd)

    # N.B., nans can occur if any of the populations have completely missing
    # genotype calls at a variant (i.e., allele number is zero). Here we
    # assume that is rare enough to be negligible.

    # calculate overall estimate
    d = np.nansum(num) / np.nansum(den)

    # compute the numerator and denominator within each block
    num_bsum = moving_statistic(num, statistic=np.nansum, size=blen)
    den_bsum = moving_statistic(den, statistic=np.nansum, size=blen)

    # calculate the statistic values in each block
    vb = num_bsum / den_bsum

    # estimate standard error
    _, se, vj = jackknife((num_bsum, den_bsum),
                          statistic=lambda n, d: np.sum(n) / np.sum(d))

    # compute Z score
    z = d / se

    return d, se, z, vb, vj

Example #8

0

Show file

File: admixture.py Project: oxpeter/scikit-allel

def blockwise_patterson_f3(acc, aca, acb, blen, normed=True):
    """Estimate F3(C; A, B) and standard error using the block-jackknife.

    Parameters
    ----------
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for the test population (C).
    aca : array_like, int, shape (n_variants, 2)
        Allele counts for the first source population (A).
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for the second source population (B).
    blen : int
        Block size (number of variants).
    normed : bool, optional
        If False, use un-normalised f3 values.

    Returns
    -------
    f3 : float
        Estimated value of the statistic using all data.
    se : float
        Estimated standard error.
    z : float
        Z-score (number of standard errors from zero).
    vb : ndarray, float, shape (n_blocks,)
        Value of the statistic in each block.
    vj : ndarray, float, shape (n_blocks,)
        Values of the statistic from block-jackknife resampling.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    See Also
    --------
    allel.stats.admixture.patterson_f3

    """

    # calculate per-variant values
    T, B = patterson_f3(acc, aca, acb)

    # N.B., nans can occur if any of the populations have completely missing
    # genotype calls at a variant (i.e., allele number is zero). Here we
    # assume that is rare enough to be negligible.

    # calculate overall value of statistic
    if normed:
        f3 = np.nansum(T) / np.nansum(B)
    else:
        f3 = np.nanmean(T)

    # calculate value of statistic within each block
    if normed:
        T_bsum = moving_statistic(T, statistic=np.nansum, size=blen)
        B_bsum = moving_statistic(B, statistic=np.nansum, size=blen)
        vb = T_bsum / B_bsum
        _, se, vj = jackknife((T_bsum, B_bsum),
                              statistic=lambda t, b: np.sum(t) / np.sum(b))

    else:
        vb = moving_statistic(T, statistic=np.nanmean, size=blen)
        _, se, vj = jackknife(vb, statistic=np.mean)

    # compute Z score
    z = f3 / se

    return f3, se, z, vb, vj