def test_count_variant_alleles__chunked(): rs = np.random.RandomState(0) calls = rs.randint(0, 1, size=(50, 10, 2)) ds = get_dataset(calls) ac1 = count_variant_alleles(ds) # Coerce from numpy to multiple chunks in all dimensions ds["call_genotype"] = ds["call_genotype"].chunk(chunks=(5, 5, 1)) # type: ignore[arg-type] ac2 = count_variant_alleles(ds) xr.testing.assert_equal(ac1, ac2) # type: ignore[no-untyped-call]
def test_count_variant_alleles__chunked(): rs = np.random.RandomState(0) calls = rs.randint(0, 1, size=(50, 10, 2)) ds = get_dataset(calls) ac1 = count_variant_alleles(ds) # Coerce from numpy to multiple chunks in all dimensions ds["call_genotype"] = ds["call_genotype"].chunk(chunks=(5, 5, 1)) ac2 = count_variant_alleles(ds) assert isinstance(ac2["variant_allele_count"].data, da.Array) xr.testing.assert_equal(ac1, ac2)
def test_count_variant_alleles__missing_data(): ds = count_variant_alleles( get_dataset([ [[-1, -1], [-1, -1], [-1, -1]], [[-1, -1], [0, 0], [-1, 1]], [[1, 1], [-1, -1], [-1, 0]], [[1, 1], [1, 1], [1, 1]], ])) ac = ds["variant_allele_count"] np.testing.assert_equal(ac, np.array([[0, 0], [2, 1], [1, 2], [0, 6]]))
def test_count_variant_alleles__multi_variant_multi_sample(): ds = count_variant_alleles( get_dataset([ [[0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 1]], [[1, 1], [0, 1], [1, 0]], [[1, 1], [1, 1], [1, 1]], ])) ac = ds["variant_allele_count"] np.testing.assert_equal(ac, np.array([[6, 0], [5, 1], [2, 4], [0, 6]]))
def test_count_variant_alleles__higher_ploidy(): ds = count_variant_alleles( get_dataset( [ [[-1, -1, 0], [-1, -1, 1], [-1, -1, 2]], [[0, 1, 2], [1, 2, 3], [-1, -1, -1]], ], n_allele=4, n_ploidy=3, )) ac = ds["variant_allele_count"] np.testing.assert_equal(ac, np.array([[1, 1, 1, 0], [1, 2, 2, 1]]))
def test_sample_stats(precompute_variant_allele_count): ds = get_dataset([[[1, 0], [-1, -1]], [[1, 0], [1, 1]], [[0, 1], [1, 0]], [[-1, -1], [0, 0]]]) if precompute_variant_allele_count: ds = count_variant_alleles(ds) ss = sample_stats(ds) np.testing.assert_equal(ss["sample_n_called"], np.array([3, 3])) np.testing.assert_equal(ss["sample_call_rate"], np.array([0.75, 0.75])) np.testing.assert_equal(ss["sample_n_hom_ref"], np.array([0, 1])) np.testing.assert_equal(ss["sample_n_hom_alt"], np.array([0, 1])) np.testing.assert_equal(ss["sample_n_het"], np.array([3, 1])) np.testing.assert_equal(ss["sample_n_non_ref"], np.array([3, 2]))
def test_variant_stats(precompute_variant_allele_count): ds = get_dataset([[[1, 0], [-1, -1]], [[1, 0], [1, 1]], [[0, 1], [1, 0]], [[-1, -1], [0, 0]]]) if precompute_variant_allele_count: ds = count_variant_alleles(ds) vs = variant_stats(ds) np.testing.assert_equal(vs["variant_n_called"], np.array([1, 2, 2, 1])) np.testing.assert_equal(vs["variant_call_rate"], np.array([0.5, 1.0, 1.0, 0.5])) np.testing.assert_equal(vs["variant_n_hom_ref"], np.array([0, 0, 0, 1])) np.testing.assert_equal(vs["variant_n_hom_alt"], np.array([0, 1, 0, 0])) np.testing.assert_equal(vs["variant_n_het"], np.array([1, 1, 2, 0])) np.testing.assert_equal(vs["variant_n_non_ref"], np.array([1, 2, 2, 0])) np.testing.assert_equal(vs["variant_allele_count"], np.array([[1, 1], [1, 3], [2, 2], [2, 0]])) np.testing.assert_equal(vs["variant_allele_total"], np.array([2, 4, 4, 2])) np.testing.assert_equal( vs["variant_allele_frequency"], np.array([[0.5, 0.5], [0.25, 0.75], [0.5, 0.5], [1, 0]]), )
def test_count_variant_alleles__single_variant_single_sample(): ds = count_variant_alleles(get_dataset([[[1, 0]]])) assert "call_genotype" in ds ac = ds["variant_allele_count"] np.testing.assert_equal(ac, np.array([[1, 1]]))
def test_count_variant_alleles__no_merge(): ds = count_variant_alleles(get_dataset([[[1, 0]]]), merge=False) assert "call_genotype" not in ds ac = ds["variant_allele_count"] np.testing.assert_equal(ac, np.array([[1, 1]]))