예제 #1
0
def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes,
                       chunks):
    ts = simulate_ts(sample_size, length=200)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(
        ds,
        ts,
        n_cohorts,
        cohort_key_names=["cohorts_0", "cohorts_1",
                          "cohorts_2"])  # type: ignore[no-untyped-call]
    ds = window(ds, size=25)

    ds = pbs(ds, cohorts=cohorts)

    # scikit-allel
    for i, j, k in itertools.combinations(range(n_cohorts), 3):
        stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}",
                                       cohorts_1=f"co_{j}",
                                       cohorts_2=f"co_{k}").values)

        if cohort_indexes is not None and (i, j, k) not in cohort_indexes:
            np.testing.assert_array_equal(stat_pbs,
                                          np.full_like(stat_pbs, np.nan))
        else:
            ac_i = ds.cohort_allele_count.values[:, i, :]
            ac_j = ds.cohort_allele_count.values[:, j, :]
            ac_k = ds.cohort_allele_count.values[:, k, :]

            ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=25)

            # scikit-allel has final window missing
            np.testing.assert_allclose(stat_pbs[:-1], ska_pbs_value)
예제 #2
0
파일: test_popgen.py 프로젝트: hammer/sgkit
def test_pbs(sample_size, n_cohorts):
    ts = simulate_ts(sample_size)
    ds = ts_to_dataset(ts)
    ds, subsets = add_cohorts(
        ds,
        ts,
        n_cohorts,
        cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"])
    n_variants = ds.dims["variants"]
    ds = window_by_variant(ds, size=n_variants)  # single window

    ds = pbs(ds)

    # scikit-allel
    for i, j, k in itertools.combinations(range(n_cohorts), 3):
        stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}",
                                       cohorts_1=f"co_{j}",
                                       cohorts_2=f"co_{k}").values)

        ac_i = ds.cohort_allele_count.values[:, i, :]
        ac_j = ds.cohort_allele_count.values[:, j, :]
        ac_k = ds.cohort_allele_count.values[:, k, :]

        ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=n_variants)

        np.testing.assert_allclose(stat_pbs, ska_pbs_value)
예제 #3
0
def test_pbs(sample_size, n_cohorts):
    ts = msprime.simulate(sample_size,
                          length=100,
                          mutation_rate=0.05,
                          random_seed=42)
    ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(
        ds,
        ts,
        n_cohorts,
        cohort_key_names=["cohorts_0", "cohorts_1",
                          "cohorts_2"])  # type: ignore[no-untyped-call]
    n_variants = ds.dims["variants"]
    ds = window(ds, size=n_variants)  # single window

    ds = pbs(ds)

    # scikit-allel
    for i, j, k in itertools.combinations(range(n_cohorts), 3):
        stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}",
                                       cohorts_1=f"co_{j}",
                                       cohorts_2=f"co_{k}").values)

        ac_i = ds.cohort_allele_count.values[:, i, :]
        ac_j = ds.cohort_allele_count.values[:, j, :]
        ac_k = ds.cohort_allele_count.values[:, k, :]

        ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=n_variants)

        np.testing.assert_allclose(stat_pbs, ska_pbs_value)