def test_patterson_f3(self): aca = [[0, 2], [2, 0], [0, 2], [0, 2], [0, 0]] acb = [[2, 0], [0, 2], [0, 2], [0, 2], [0, 2]] acc = [[1, 1], [1, 1], [0, 2], [2, 0], [1, 1]] expect_f3 = [-.5, -.5, 0., 1., np.nan] actual_f3, actual_hzc = allel.patterson_f3(acc, aca, acb) assert_array_almost_equal(expect_f3, actual_f3) expect_hzc = [1., 1., 0., 0., 1.] assert_array_almost_equal(expect_hzc, actual_hzc)
def traditional_stats(data): """ Caclulates lots of (mostly) traditional statistics, that are summaries of the site frequency spectrum. Arguments --------- data: Named tuple of results (made by collate_results function) Returns --------- Nested dictionary of statistics """ pop_names = ["domestic", "wild", "captive", "all_pops"] stats = { "sfs_mean": {}, "diversity": {}, "wattersons_theta": {}, "tajimas_d": {}, "observed_heterozygosity": {}, "expected_heterozygosity": {}, "segregating_sites": {}, "monomorphic_sites": {}, "roh_mean": {}, "roh_iqr": {}, "r2": {}, "f3": {}, "divergence": {}, "fst": {}, "f2": {}, } for pop in pop_names: # One way statistics stats["sfs_mean"][pop] = binned_sfs_mean(data.allele_counts[pop]) stats["diversity"][pop] = allel.sequence_diversity( data.positions, data.allele_counts[pop]) stats["wattersons_theta"][pop] = allel.watterson_theta( data.positions, data.allele_counts[pop]) stats["tajimas_d"][pop] = allel.tajima_d(data.allele_counts[pop], data.positions) stats["observed_heterozygosity"][pop] = allel.heterozygosity_observed( data.genotypes[pop]).mean() stats["expected_heterozygosity"][pop] = allel.heterozygosity_expected( data.allele_counts[pop].to_frequencies(), ploidy=2).mean() stats["segregating_sites"] = data.allele_counts[pop].count_segregating( ) if pop != "all_pops": # all_pops has no monomorphic sites stats["monomorphic_sites"][pop] = data.allele_counts[ pop].count_non_segregating() # Three way statistics other_pops = [ pop_name for pop_name in pop_names if pop_name not in ["all_pops", pop] ] t, b = allel.patterson_f3(data.allele_counts[pop], data.allele_counts[other_pops[0]], data.allele_counts[other_pops[1]]) stats["f3"][pop] = np.sum(t) / np.sum(b) # Two way statistics for comparison in ["domestic_wild", "domestic_captive", "wild_captive"]: p = comparison.split("_") stats["divergence"][comparison] = allel.sequence_divergence( data.positions, data.allele_counts[p[0]], data.allele_counts[p[1]]) num, den = allel.hudson_fst(data.allele_counts[p[0]], data.allele_counts[p[1]]) stats["fst"][comparison] = np.sum(num) / np.sum(den) stats["f2"][comparison] = allel.patterson_f2( data.allele_counts[p[0]], data.allele_counts[p[1]]).mean() return stats