Beispiel #1
0
def correlation_reduce_cpu(v: ArrayLike,
                           out: ArrayLike) -> None:  # pragma: no cover
    """Corresponding "reduce" function for pearson correlation
    Parameters
    ----------
    v
        The correlation array on which pearson corrections has been
        applied on chunks
    out
        An ndarray, which is a symmetric matrix of pearson correlation

    Returns
    -------
    An ndarray, which contains the result of the calculation of the application
    of euclidean distance on all the chunks.
    """
    v = v.sum(axis=0)
    n = v[5]
    num = n * v[4] - v[0] * v[1]
    denom1 = np.sqrt(n * v[2] - v[0]**2)
    denom2 = np.sqrt(n * v[3] - v[1]**2)
    denom = denom1 * denom2
    value = np.nan
    if denom > 0:
        value = 1 - (num / denom)
    out[0] = value
Beispiel #2
0
def _divergence(ac: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
    """Generalized U-function for computing divergence.

    Parameters
    ----------
    ac
        Allele counts of shape (cohorts, alleles) containing per-cohort allele counts.
    out
        Pairwise divergence stats with shape (cohorts, cohorts), where the entry at
        (i, j) is the divergence between cohort i and cohort j.
    """
    an = ac.sum(axis=-1)
    out[:, :] = np.nan  # (cohorts, cohorts)
    n_cohorts = ac.shape[0]
    n_alleles = ac.shape[1]
    # calculate the divergence for each cohort pair
    for i in range(n_cohorts):
        for j in range(i + 1, n_cohorts):
            n_pairs = an[i] * an[j]
            if n_pairs != 0.0:
                n_same = 0
                for k in range(n_alleles):
                    n_same += ac[i, k] * ac[j, k]
                n_diff = n_pairs - n_same
                div = n_diff / n_pairs
                out[i, j] = div
                out[j, i] = div

    # calculate the diversity for each cohort
    for i in range(n_cohorts):
        n_pairs = an[i] * (an[i] - 1)
        n_same = 0
        for k in range(n_alleles):
            n_same += ac[i, k] * (ac[i, k] - 1)
        n_diff = n_pairs - n_same
        if n_pairs != 0.0:
            div = n_diff / n_pairs
            out[i, i] = div