Ejemplo n.º 1
0
def test_count_variant_alleles__chunked():
    rs = np.random.RandomState(0)
    calls = rs.randint(0, 1, size=(50, 10, 2))
    ds = get_dataset(calls)
    ac1 = count_variant_alleles(ds)
    # Coerce from numpy to multiple chunks in all dimensions
    ds["call_genotype"] = ds["call_genotype"].chunk(chunks=(5, 5, 1))  # type: ignore[arg-type]
    ac2 = count_variant_alleles(ds)
    xr.testing.assert_equal(ac1, ac2)  # type: ignore[no-untyped-call]
Ejemplo n.º 2
0
def test_count_variant_alleles__chunked():
    rs = np.random.RandomState(0)
    calls = rs.randint(0, 1, size=(50, 10, 2))
    ds = get_dataset(calls)
    ac1 = count_variant_alleles(ds)
    # Coerce from numpy to multiple chunks in all dimensions
    ds["call_genotype"] = ds["call_genotype"].chunk(chunks=(5, 5, 1))
    ac2 = count_variant_alleles(ds)
    assert isinstance(ac2["variant_allele_count"].data, da.Array)
    xr.testing.assert_equal(ac1, ac2)
Ejemplo n.º 3
0
def test_count_variant_alleles__missing_data():
    ds = count_variant_alleles(
        get_dataset([
            [[-1, -1], [-1, -1], [-1, -1]],
            [[-1, -1], [0, 0], [-1, 1]],
            [[1, 1], [-1, -1], [-1, 0]],
            [[1, 1], [1, 1], [1, 1]],
        ]))
    ac = ds["variant_allele_count"]
    np.testing.assert_equal(ac, np.array([[0, 0], [2, 1], [1, 2], [0, 6]]))
Ejemplo n.º 4
0
def test_count_variant_alleles__multi_variant_multi_sample():
    ds = count_variant_alleles(
        get_dataset([
            [[0, 0], [0, 0], [0, 0]],
            [[0, 0], [0, 0], [0, 1]],
            [[1, 1], [0, 1], [1, 0]],
            [[1, 1], [1, 1], [1, 1]],
        ]))
    ac = ds["variant_allele_count"]
    np.testing.assert_equal(ac, np.array([[6, 0], [5, 1], [2, 4], [0, 6]]))
Ejemplo n.º 5
0
def test_count_variant_alleles__higher_ploidy():
    ds = count_variant_alleles(
        get_dataset(
            [
                [[-1, -1, 0], [-1, -1, 1], [-1, -1, 2]],
                [[0, 1, 2], [1, 2, 3], [-1, -1, -1]],
            ],
            n_allele=4,
            n_ploidy=3,
        ))
    ac = ds["variant_allele_count"]
    np.testing.assert_equal(ac, np.array([[1, 1, 1, 0], [1, 2, 2, 1]]))
Ejemplo n.º 6
0
def test_sample_stats(precompute_variant_allele_count):
    ds = get_dataset([[[1, 0], [-1, -1]], [[1, 0], [1, 1]], [[0, 1], [1, 0]],
                      [[-1, -1], [0, 0]]])
    if precompute_variant_allele_count:
        ds = count_variant_alleles(ds)
    ss = sample_stats(ds)

    np.testing.assert_equal(ss["sample_n_called"], np.array([3, 3]))
    np.testing.assert_equal(ss["sample_call_rate"], np.array([0.75, 0.75]))
    np.testing.assert_equal(ss["sample_n_hom_ref"], np.array([0, 1]))
    np.testing.assert_equal(ss["sample_n_hom_alt"], np.array([0, 1]))
    np.testing.assert_equal(ss["sample_n_het"], np.array([3, 1]))
    np.testing.assert_equal(ss["sample_n_non_ref"], np.array([3, 2]))
Ejemplo n.º 7
0
def test_variant_stats(precompute_variant_allele_count):
    ds = get_dataset([[[1, 0], [-1, -1]], [[1, 0], [1, 1]], [[0, 1], [1, 0]],
                      [[-1, -1], [0, 0]]])
    if precompute_variant_allele_count:
        ds = count_variant_alleles(ds)
    vs = variant_stats(ds)

    np.testing.assert_equal(vs["variant_n_called"], np.array([1, 2, 2, 1]))
    np.testing.assert_equal(vs["variant_call_rate"],
                            np.array([0.5, 1.0, 1.0, 0.5]))
    np.testing.assert_equal(vs["variant_n_hom_ref"], np.array([0, 0, 0, 1]))
    np.testing.assert_equal(vs["variant_n_hom_alt"], np.array([0, 1, 0, 0]))
    np.testing.assert_equal(vs["variant_n_het"], np.array([1, 1, 2, 0]))
    np.testing.assert_equal(vs["variant_n_non_ref"], np.array([1, 2, 2, 0]))
    np.testing.assert_equal(vs["variant_allele_count"],
                            np.array([[1, 1], [1, 3], [2, 2], [2, 0]]))
    np.testing.assert_equal(vs["variant_allele_total"], np.array([2, 4, 4, 2]))
    np.testing.assert_equal(
        vs["variant_allele_frequency"],
        np.array([[0.5, 0.5], [0.25, 0.75], [0.5, 0.5], [1, 0]]),
    )
Ejemplo n.º 8
0
def test_count_variant_alleles__single_variant_single_sample():
    ds = count_variant_alleles(get_dataset([[[1, 0]]]))
    assert "call_genotype" in ds
    ac = ds["variant_allele_count"]
    np.testing.assert_equal(ac, np.array([[1, 1]]))
Ejemplo n.º 9
0
def test_count_variant_alleles__no_merge():
    ds = count_variant_alleles(get_dataset([[[1, 0]]]), merge=False)
    assert "call_genotype" not in ds
    ac = ds["variant_allele_count"]
    np.testing.assert_equal(ac, np.array([[1, 1]]))