def test_count_cohort_alleles__chunked(): rs = np.random.RandomState(0) calls = rs.randint(0, 1, size=(50, 10, 2)) ds = get_dataset(calls) sample_cohort = np.repeat([0, 1], ds.dims["samples"] // 2) ds["sample_cohort"] = xr.DataArray(sample_cohort, dims="samples") ac1 = count_cohort_alleles(ds) # Coerce from numpy to multiple chunks in variants dimension only ds["call_genotype"] = ds["call_genotype"].chunk(chunks=(5, -1, -1)) ac2 = count_cohort_alleles(ds) assert isinstance(ac2["cohort_allele_count"].data, da.Array) xr.testing.assert_equal(ac1, ac2)
def test_cohort_allele_frequencies__polyploid(chunks): ds = get_dataset( [ [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, -2, -2]], [[0, 0, 0, 0], [0, 1, 0, 0], [0, 1, -2, -2]], [[1, 1, 0, 1], [1, 0, 0, 0], [2, 2, -2, -2]], [[1, -1, 1, 1], [-1, -1, 0, 0], [0, 0, -2, -2]], ], n_ploidy=4, n_allele=3, ) ds["sample_cohort"] = "samples", [0, 1, 0] ds = count_cohort_alleles(ds).compute() ds["cohort_allele_count"] = ds["cohort_allele_count"].chunk(chunks) ds = cohort_allele_frequencies(ds) af = ds["cohort_allele_frequency"] np.testing.assert_equal( af, np.array([ [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0]], [[5 / 6, 1 / 6, 0.0], [0.75, 0.25, 0.0]], [[1 / 6, 0.5, 1 / 3], [0.75, 0.25, 0.0]], [[0.4, 0.6, 0.0], [1.0, 0.0, 0.0]], ]), )
def test_count_cohort_alleles__multi_variant_multi_sample(): ds = get_dataset([ [[0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 1], [0, 1]], [[1, 1], [0, 1], [1, 0], [1, 0]], [[1, 1], [1, 1], [1, 1], [1, 1]], ]) # -1 means that the sample is not in any cohort ds["sample_cohort"] = xr.DataArray(np.array([0, 1, 1, -1]), dims="samples") ds = count_cohort_alleles(ds) ac = ds.cohort_allele_count np.testing.assert_equal( ac, np.array([[[2, 0], [4, 0]], [[2, 0], [3, 1]], [[0, 2], [2, 2]], [[0, 2], [0, 4]]]), )
def test_cohort_allele_frequencies__diploid(chunks): ds = get_dataset([ [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0]], [[1, 1], [0, 1], [1, 0], [0, 0], [0, 0], [1, 1]], [[1, -1], [1, 1], [-1, -1], [0, 0], [0, 0], [-1, -1]], ]) ds["sample_cohort"] = "samples", [1, 0, 0, 1, -1, 1] ds = count_cohort_alleles(ds).compute() ds["cohort_allele_count"] = ds["cohort_allele_count"].chunk(chunks) ds = cohort_allele_frequencies(ds) af = ds["cohort_allele_frequency"] np.testing.assert_equal( af, np.array([ [[1.0, 0.0], [1.0, 0.0]], [[0.75, 0.25], [1.0, 0.0]], [[0.5, 0.5], [1 / 3, 2 / 3]], [[0.0, 1.0], [2 / 3, 1 / 3]], ]), )