예제 #1
0
def haplotype_diversity(h):
    """Estimate haplotype diversity.

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.

    Returns
    -------
    hd : float
        Haplotype diversity.

    """

    # check inputs
    h = HaplotypeArray(h, copy=False)

    # number of haplotypes
    n = h.n_haplotypes

    # compute haplotype frequencies
    f = h.distinct_frequencies()

    # estimate haplotype diversity
    hd = (1 - np.sum(f**2)) * n / (n - 1)

    return hd
예제 #2
0
def haplotype_diversity(h):
    """Estimate haplotype diversity.

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.

    Returns
    -------
    hd : float
        Haplotype diversity.

    """

    # check inputs
    h = HaplotypeArray(h, copy=False)

    # number of haplotypes
    n = h.n_haplotypes

    # compute haplotype frequencies
    f = h.distinct_frequencies()

    # estimate haplotype diversity
    hd = (1 - np.sum(f ** 2)) * n / (n - 1)

    return hd
예제 #3
0
def garud_h(h):
    """Compute the H1, H12, H123 and H2/H1 statistics for detecting signatures
    of soft sweeps, as defined in Garud et al. (2015).

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.

    Returns
    -------
    h1 : float
        H1 statistic (sum of squares of haplotype frequencies).
    h12 : float
        H12 statistic (sum of squares of haplotype frequencies, combining
        the two most common haplotypes into a single frequency).
    h123 : float
        H123 statistic (sum of squares of haplotype frequencies, combining
        the three most common haplotypes into a single frequency).
    h2_h1 : float
        H2/H1 statistic, indicating the "softness" of a sweep.

    """

    # check inputs
    h = HaplotypeArray(h, copy=False)

    # compute haplotype frequencies
    f = h.distinct_frequencies()

    # compute H1
    h1 = np.sum(f**2)

    # compute H12
    h12 = np.sum(f[:2])**2 + np.sum(f[2:]**2)

    # compute H123
    h123 = np.sum(f[:3])**2 + np.sum(f[3:]**2)

    # compute H2/H1
    h2 = h1 - f[0]**2
    h2_h1 = h2 / h1

    return h1, h12, h123, h2_h1
예제 #4
0
def garud_h(h):
    """Compute the H1, H12, H123 and H2/H1 statistics for detecting signatures
    of soft sweeps, as defined in Garud et al. (2015).

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.

    Returns
    -------
    h1 : float
        H1 statistic (sum of squares of haplotype frequencies).
    h12 : float
        H12 statistic (sum of squares of haplotype frequencies, combining
        the two most common haplotypes into a single frequency).
    h123 : float
        H123 statistic (sum of squares of haplotype frequencies, combining
        the three most common haplotypes into a single frequency).
    h2_h1 : float
        H2/H1 statistic, indicating the "softness" of a sweep.

    """

    # check inputs
    h = HaplotypeArray(h, copy=False)

    # compute haplotype frequencies
    f = h.distinct_frequencies()

    # compute H1
    h1 = np.sum(f ** 2)

    # compute H12
    h12 = np.sum(f[:2]) ** 2 + np.sum(f[2:] ** 2)

    # compute H123
    h123 = np.sum(f[:3]) ** 2 + np.sum(f[3:] ** 2)

    # compute H2/H1
    h2 = h1 - f[0] ** 2
    h2_h1 = h2 / h1

    return h1, h12, h123, h2_h1