Esempio n. 1
0
def test_Garud_h(n_variants, n_samples, n_contigs, n_cohorts, cohorts,
                 cohort_indexes, chunks):
    ds = simulate_genotype_call_dataset(n_variant=n_variants,
                                        n_sample=n_samples,
                                        n_contig=n_contigs)
    ds = ds.chunk(dict(zip(["variants", "samples"], chunks)))
    subsets = np.array_split(ds.samples.values, n_cohorts)
    sample_cohorts = np.concatenate(
        [np.full_like(subset, i) for i, subset in enumerate(subsets)])
    ds["sample_cohort"] = xr.DataArray(sample_cohorts, dims="samples")
    cohort_names = [f"co_{i}" for i in range(n_cohorts)]
    coords = {k: cohort_names for k in ["cohorts"]}
    ds = ds.assign_coords(coords)  # type: ignore[no-untyped-call]
    ds = window(ds, size=3)

    gh = Garud_H(ds, cohorts=cohorts)
    h1 = gh.stat_Garud_h1.values
    h12 = gh.stat_Garud_h12.values
    h123 = gh.stat_Garud_h123.values
    h2_h1 = gh.stat_Garud_h2_h1.values

    # scikit-allel
    for c in range(n_cohorts):
        if cohort_indexes is not None and c not in cohort_indexes:
            # cohorts that were not computed should be nan
            np.testing.assert_array_equal(h1[:, c],
                                          np.full_like(h1[:, c], np.nan))
            np.testing.assert_array_equal(h12[:, c],
                                          np.full_like(h12[:, c], np.nan))
            np.testing.assert_array_equal(h123[:, c],
                                          np.full_like(h123[:, c], np.nan))
            np.testing.assert_array_equal(h2_h1[:, c],
                                          np.full_like(h2_h1[:, c], np.nan))
        else:
            gt = ds.call_genotype.values[:, sample_cohorts == c, :]
            ska_gt = allel.GenotypeArray(gt)
            ska_ha = ska_gt.to_haplotypes()
            ska_h = allel.moving_garud_h(ska_ha, size=3)

            np.testing.assert_allclose(h1[:, c], ska_h[0])
            np.testing.assert_allclose(h12[:, c], ska_h[1])
            np.testing.assert_allclose(h123[:, c], ska_h[2])
            np.testing.assert_allclose(h2_h1[:, c], ska_h[3])
Esempio n. 2
0
def test_Garud_h__raise_on_no_windows():
    ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10)

    with pytest.raises(ValueError,
                       match="Dataset must be windowed for Garud_H"):
        Garud_H(ds)
Esempio n. 3
0
def test_Garud_h__raise_on_non_diploid():
    ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, n_ploidy=3)
    with pytest.raises(NotImplementedError,
                       match="Garud H only implemented for diploid genotypes"):
        Garud_H(ds)