def test_diversity__windowed(sample_size): ts = simulate_ts(sample_size, length=200) ds = ts_to_dataset(ts) # type: ignore[no-untyped-call] ds, subsets = add_cohorts( ds, ts, cohort_key_names=["cohorts"]) # type: ignore[no-untyped-call] ds = window(ds, size=25) ds = diversity(ds) div = ds["stat_diversity"].sel(cohorts="co_0").compute() # Calculate diversity using tskit windows # Find the variant positions so we can have windows with a fixed number of variants positions = ts.tables.sites.position windows = np.concatenate(([0], positions[::25][1:], [ts.sequence_length])) ts_div = ts.diversity(windows=windows, span_normalise=False) np.testing.assert_allclose(div, ts_div) # Calculate diversity using scikit-allel moving_statistic # (Don't use windowed_diversity, since it treats the last window differently) ds = count_variant_alleles( ts_to_dataset(ts)) # type: ignore[no-untyped-call] ac = ds["variant_allele_count"].values mpd = allel.mean_pairwise_difference(ac, fill=0) ska_div = allel.moving_statistic(mpd, np.sum, size=25) np.testing.assert_allclose( div[:-1], ska_div) # scikit-allel has final window missing
def test_diversity(sample_size, chunks, cohort_allele_count): ts = simulate_ts(sample_size) ds = ts_to_dataset(ts, chunks) ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"]) if cohort_allele_count is not None: ds = count_cohort_alleles(ds, merge=False).rename( {variables.cohort_allele_count: cohort_allele_count}) ds = ds.assign_coords({"cohorts": ["co_0"]}) ds = diversity(ds, cohort_allele_count=cohort_allele_count) else: ds = ds.assign_coords({"cohorts": ["co_0"]}) ds = diversity(ds) div = ds.stat_diversity.sum(axis=0, skipna=False).sel(cohorts="co_0").values ts_div = ts.diversity(span_normalise=False) np.testing.assert_allclose(div, ts_div)
def test_diversity(sample_size, chunks, cohort_allele_count): ts = msprime.simulate(sample_size, length=100, mutation_rate=0.05, random_seed=42) ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call] ds, subsets = add_cohorts( ds, ts, cohort_key_names=["cohorts"]) # type: ignore[no-untyped-call] if cohort_allele_count is not None: ds = count_cohort_alleles(ds, merge=False).rename( {variables.cohort_allele_count: cohort_allele_count}) ds = ds.assign_coords({"cohorts": ["co_0"]}) ds = diversity(ds, cohort_allele_count=cohort_allele_count) else: ds = ds.assign_coords({"cohorts": ["co_0"]}) ds = diversity(ds) div = ds.stat_diversity.sum(axis=0, skipna=False).sel(cohorts="co_0").values ts_div = ts.diversity(span_normalise=False) np.testing.assert_allclose(div, ts_div)
def test_diversity__missing_call_genotype(): ds = xr.Dataset() with pytest.raises(ValueError, match="call_genotype not present"): diversity(ds)