def patterson_f3(acc, aca, acb):
    """Unbiased estimator for F3(C; A, B), the three-population test for
    admixture in population C.

    Parameters
    ----------
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for the test population (C).
    aca : array_like, int, shape (n_variants, 2)
        Allele counts for the first source population (A).
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for the second source population (B).

    Returns
    -------
    T : ndarray, float, shape (n_variants,)
        Un-normalized f3 estimates per variant.
    B : ndarray, float, shape (n_variants,)
        Estimates for heterozygosity in population C.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    For un-normalized f3 statistics, ignore the `B` return value.

    To compute the f3* statistic, which is normalized by heterozygosity in
    population C to remove numerical dependence on the allele frequency
    spectrum, compute ``np.sum(T) / np.sum(B)``.

    """

    # check inputs
    aca = AlleleCountsArray(aca, copy=False)
    assert aca.shape[1] == 2, 'only biallelic variants supported'
    acb = AlleleCountsArray(acb, copy=False)
    assert acb.shape[1] == 2, 'only biallelic variants supported'
    acc = AlleleCountsArray(acc, copy=False)
    assert acc.shape[1] == 2, 'only biallelic variants supported'
    check_dim0_aligned(aca, acb, acc)

    # compute allele number and heterozygosity in test population
    sc = acc.sum(axis=1)
    hc = h_hat(acc)

    # compute sample frequencies for the alternate allele
    a = aca.to_frequencies()[:, 1]
    b = acb.to_frequencies()[:, 1]
    c = acc.to_frequencies()[:, 1]

    # compute estimator
    T = ((c - a) * (c - b)) - (hc / sc)
    B = 2 * hc

    return T, B
Exemple #2
0
def patterson_f3(acc, aca, acb):
    """Unbiased estimator for F3(C; A, B), the three-population test for
    admixture in population C.

    Parameters
    ----------
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for the test population (C).
    aca : array_like, int, shape (n_variants, 2)
        Allele counts for the first source population (A).
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for the second source population (B).

    Returns
    -------
    T : ndarray, float, shape (n_variants,)
        Un-normalized f3 estimates per variant.
    B : ndarray, float, shape (n_variants,)
        Estimates for heterozygosity in population C.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    For un-normalized f3 statistics, ignore the `B` return value.

    To compute the f3* statistic, which is normalized by heterozygosity in
    population C to remove numerical dependence on the allele frequency
    spectrum, compute ``np.sum(T) / np.sum(B)``.

    """

    # check inputs
    aca = AlleleCountsArray(aca, copy=False)
    assert aca.shape[1] == 2, 'only biallelic variants supported'
    acb = AlleleCountsArray(acb, copy=False)
    assert acb.shape[1] == 2, 'only biallelic variants supported'
    acc = AlleleCountsArray(acc, copy=False)
    assert acc.shape[1] == 2, 'only biallelic variants supported'
    check_dim0_aligned(aca, acb, acc)

    # compute allele number and heterozygosity in test population
    sc = acc.sum(axis=1)
    hc = h_hat(acc)

    # compute sample frequencies for the alternate allele
    a = aca.to_frequencies()[:, 1]
    b = acb.to_frequencies()[:, 1]
    c = acc.to_frequencies()[:, 1]

    # compute estimator
    T = ((c - a) * (c - b)) - (hc / sc)
    B = 2 * hc

    return T, B
def patterson_d(aca, acb, acc, acd):
    """Unbiased estimator for D(A, B; C, D), the normalised four-population
    test for admixture between (A or B) and (C or D), also known as the
    "ABBA BABA" test.

    Parameters
    ----------
    aca : array_like, int, shape (n_variants, 2),
        Allele counts for population A.
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for population B.
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for population C.
    acd : array_like, int, shape (n_variants, 2)
        Allele counts for population D.

    Returns
    -------
    num : ndarray, float, shape (n_variants,)
        Numerator (un-normalised f4 estimates).
    den : ndarray, float, shape (n_variants,)
        Denominator.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    For un-normalized f4 statistics, ignore the `den` return value.

    """

    # check inputs
    aca = AlleleCountsArray(aca, copy=False)
    assert aca.shape[1] == 2, 'only biallelic variants supported'
    acb = AlleleCountsArray(acb, copy=False)
    assert acb.shape[1] == 2, 'only biallelic variants supported'
    acc = AlleleCountsArray(acc, copy=False)
    assert acc.shape[1] == 2, 'only biallelic variants supported'
    acd = AlleleCountsArray(acd, copy=False)
    assert acd.shape[1] == 2, 'only biallelic variants supported'
    check_dim0_aligned(aca, acb, acc, acd)

    # compute sample frequencies for the alternate allele
    a = aca.to_frequencies()[:, 1]
    b = acb.to_frequencies()[:, 1]
    c = acc.to_frequencies()[:, 1]
    d = acd.to_frequencies()[:, 1]

    # compute estimator
    num = (a - b) * (c - d)
    den = (a + b - (2 * a * b)) * (c + d - (2 * c * d))

    return num, den
Exemple #4
0
def patterson_d(aca, acb, acc, acd):
    """Unbiased estimator for D(A, B; C, D), the normalised four-population
    test for admixture between (A or B) and (C or D), also known as the
    "ABBA BABA" test.

    Parameters
    ----------
    aca : array_like, int, shape (n_variants, 2),
        Allele counts for population A.
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for population B.
    acc : array_like, int, shape (n_variants, 2)
        Allele counts for population C.
    acd : array_like, int, shape (n_variants, 2)
        Allele counts for population D.

    Returns
    -------
    num : ndarray, float, shape (n_variants,)
        Numerator (un-normalised f4 estimates).
    den : ndarray, float, shape (n_variants,)
        Denominator.

    Notes
    -----
    See Patterson (2012), main text and Appendix A.

    For un-normalized f4 statistics, ignore the `den` return value.

    """

    # check inputs
    aca = AlleleCountsArray(aca, copy=False)
    assert aca.shape[1] == 2, 'only biallelic variants supported'
    acb = AlleleCountsArray(acb, copy=False)
    assert acb.shape[1] == 2, 'only biallelic variants supported'
    acc = AlleleCountsArray(acc, copy=False)
    assert acc.shape[1] == 2, 'only biallelic variants supported'
    acd = AlleleCountsArray(acd, copy=False)
    assert acd.shape[1] == 2, 'only biallelic variants supported'
    check_dim0_aligned(aca, acb, acc, acd)

    # compute sample frequencies for the alternate allele
    a = aca.to_frequencies()[:, 1]
    b = acb.to_frequencies()[:, 1]
    c = acc.to_frequencies()[:, 1]
    d = acd.to_frequencies()[:, 1]

    # compute estimator
    num = (a - b) * (c - d)
    den = (a + b - (2 * a * b)) * (c + d - (2 * c * d))

    return num, den
def patterson_f2(aca, acb):
    """Unbiased estimator for F2(A, B), the branch length between populations
    A and B.

    Parameters
    ----------
    aca : array_like, int, shape (n_variants, 2)
        Allele counts for population A.
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for population B.

    Returns
    -------
    f2 : ndarray, float, shape (n_variants,)

    Notes
    -----
    See Patterson (2012), Appendix A.

    """

    # check inputs
    aca = AlleleCountsArray(aca, copy=False)
    assert aca.shape[1] == 2, 'only biallelic variants supported'
    acb = AlleleCountsArray(acb, copy=False)
    assert acb.shape[1] == 2, 'only biallelic variants supported'
    check_dim0_aligned(aca, acb)

    # compute allele numbers
    sa = aca.sum(axis=1)
    sb = acb.sum(axis=1)

    # compute heterozygosities
    ha = h_hat(aca)
    hb = h_hat(acb)

    # compute sample frequencies for the alternate allele
    a = aca.to_frequencies()[:, 1]
    b = acb.to_frequencies()[:, 1]

    # compute estimator
    x = ((a - b)**2) - (ha / sa) - (hb / sb)

    return x
Exemple #6
0
def patterson_f2(aca, acb):
    """Unbiased estimator for F2(A, B), the branch length between populations
    A and B.

    Parameters
    ----------
    aca : array_like, int, shape (n_variants, 2)
        Allele counts for population A.
    acb : array_like, int, shape (n_variants, 2)
        Allele counts for population B.

    Returns
    -------
    f2 : ndarray, float, shape (n_variants,)

    Notes
    -----
    See Patterson (2012), Appendix A.

    """

    # check inputs
    aca = AlleleCountsArray(aca, copy=False)
    assert aca.shape[1] == 2, 'only biallelic variants supported'
    acb = AlleleCountsArray(acb, copy=False)
    assert acb.shape[1] == 2, 'only biallelic variants supported'
    check_dim0_aligned(aca, acb)

    # compute allele numbers
    sa = aca.sum(axis=1)
    sb = acb.sum(axis=1)

    # compute heterozygosities
    ha = h_hat(aca)
    hb = h_hat(acb)

    # compute sample frequencies for the alternate allele
    a = aca.to_frequencies()[:, 1]
    b = acb.to_frequencies()[:, 1]

    # compute estimator
    x = ((a - b) ** 2) - (ha / sa) - (hb / sb)

    return x