def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks): ts = simulate_ts(sample_size, length=200) ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call] ds, subsets = add_cohorts( ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call] ds = window(ds, size=25) ds = pbs(ds, cohorts=cohorts) # scikit-allel for i, j, k in itertools.combinations(range(n_cohorts), 3): stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}", cohorts_1=f"co_{j}", cohorts_2=f"co_{k}").values) if cohort_indexes is not None and (i, j, k) not in cohort_indexes: np.testing.assert_array_equal(stat_pbs, np.full_like(stat_pbs, np.nan)) else: ac_i = ds.cohort_allele_count.values[:, i, :] ac_j = ds.cohort_allele_count.values[:, j, :] ac_k = ds.cohort_allele_count.values[:, k, :] ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=25) # scikit-allel has final window missing np.testing.assert_allclose(stat_pbs[:-1], ska_pbs_value)
def test_pbs(sample_size, n_cohorts): ts = simulate_ts(sample_size) ds = ts_to_dataset(ts) ds, subsets = add_cohorts( ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) n_variants = ds.dims["variants"] ds = window_by_variant(ds, size=n_variants) # single window ds = pbs(ds) # scikit-allel for i, j, k in itertools.combinations(range(n_cohorts), 3): stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}", cohorts_1=f"co_{j}", cohorts_2=f"co_{k}").values) ac_i = ds.cohort_allele_count.values[:, i, :] ac_j = ds.cohort_allele_count.values[:, j, :] ac_k = ds.cohort_allele_count.values[:, k, :] ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=n_variants) np.testing.assert_allclose(stat_pbs, ska_pbs_value)
def test_pbs(sample_size, n_cohorts): ts = msprime.simulate(sample_size, length=100, mutation_rate=0.05, random_seed=42) ds = ts_to_dataset(ts) # type: ignore[no-untyped-call] ds, subsets = add_cohorts( ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call] n_variants = ds.dims["variants"] ds = window(ds, size=n_variants) # single window ds = pbs(ds) # scikit-allel for i, j, k in itertools.combinations(range(n_cohorts), 3): stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}", cohorts_1=f"co_{j}", cohorts_2=f"co_{k}").values) ac_i = ds.cohort_allele_count.values[:, i, :] ac_j = ds.cohort_allele_count.values[:, j, :] ac_k = ds.cohort_allele_count.values[:, k, :] ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=n_variants) np.testing.assert_allclose(stat_pbs, ska_pbs_value)