Beispiel #1
0
 def test_patterson_f2(self):
     aca = [[0, 2],
            [2, 0],
            [1, 1],
            [0, 0]]
     acb = [[0, 2],
            [0, 2],
            [0, 2],
            [0, 2]]
     expect = [0., 1., 0., np.nan]
     actual = allel.patterson_f2(aca, acb)
     assert_array_almost_equal(expect, actual)
Beispiel #2
0
def traditional_stats(data):
    """
    Caclulates lots of (mostly) traditional statistics,
    that are summaries of the site frequency spectrum.

    Arguments
    ---------
    data: Named tuple of results (made by collate_results function)

    Returns
    ---------
    Nested dictionary of statistics
    """
    pop_names = ["domestic", "wild", "captive", "all_pops"]

    stats = {
        "sfs_mean": {},
        "diversity": {},
        "wattersons_theta": {},
        "tajimas_d": {},
        "observed_heterozygosity": {},
        "expected_heterozygosity": {},
        "segregating_sites": {},
        "monomorphic_sites": {},
        "roh_mean": {},
        "roh_iqr": {},
        "r2": {},
        "f3": {},
        "divergence": {},
        "fst": {},
        "f2": {},
    }

    for pop in pop_names:
        # One way statistics
        stats["sfs_mean"][pop] = binned_sfs_mean(data.allele_counts[pop])
        stats["diversity"][pop] = allel.sequence_diversity(
            data.positions, data.allele_counts[pop])
        stats["wattersons_theta"][pop] = allel.watterson_theta(
            data.positions, data.allele_counts[pop])
        stats["tajimas_d"][pop] = allel.tajima_d(data.allele_counts[pop],
                                                 data.positions)
        stats["observed_heterozygosity"][pop] = allel.heterozygosity_observed(
            data.genotypes[pop]).mean()
        stats["expected_heterozygosity"][pop] = allel.heterozygosity_expected(
            data.allele_counts[pop].to_frequencies(), ploidy=2).mean()
        stats["segregating_sites"] = data.allele_counts[pop].count_segregating(
        )

        if pop != "all_pops":  # all_pops has no monomorphic sites
            stats["monomorphic_sites"][pop] = data.allele_counts[
                pop].count_non_segregating()

            # Three way statistics
            other_pops = [
                pop_name for pop_name in pop_names
                if pop_name not in ["all_pops", pop]
            ]
            t, b = allel.patterson_f3(data.allele_counts[pop],
                                      data.allele_counts[other_pops[0]],
                                      data.allele_counts[other_pops[1]])
            stats["f3"][pop] = np.sum(t) / np.sum(b)

    # Two way statistics
    for comparison in ["domestic_wild", "domestic_captive", "wild_captive"]:
        p = comparison.split("_")
        stats["divergence"][comparison] = allel.sequence_divergence(
            data.positions, data.allele_counts[p[0]], data.allele_counts[p[1]])

        num, den = allel.hudson_fst(data.allele_counts[p[0]],
                                    data.allele_counts[p[1]])
        stats["fst"][comparison] = np.sum(num) / np.sum(den)
        stats["f2"][comparison] = allel.patterson_f2(
            data.allele_counts[p[0]], data.allele_counts[p[1]]).mean()

    return stats