Ejemplo n.º 1
0
def test_blockcut_sensitivities(tmp_path):
    """ Ensure that the block cut sets are monotone to the sensitivity"""

    results = []
    for s in range(6):
        outvcf = tmp_path / "output{}.vcf".format(s)
        run_polyphase(
            phase_input_files=["tests/data/polyploid.chr22.42M.12k.bam"],
            variant_file="tests/data/polyploid.chr22.42M.12k.vcf",
            ploidy=4,
            ignore_read_groups=True,
            block_cut_sensitivity=s,
            output=outvcf,
        )
        assert os.path.isfile(outvcf)

        tables = list(VcfReader(outvcf, phases=True))
        assert len(tables) == 1
        block_starts = set([
            i.block_id for i in tables[0].phases_of("HG00514_NA19240")
            if i is not None
        ])
        results.append(block_starts)
        print(block_starts)

    for s in range(5):
        assert all(cut in results[s + 1] for cut in results[s])
Ejemplo n.º 2
0
def test_wrong_ploidy(tmp_path):
    outvcf = tmp_path / "output.vcf"
    with raises(CommandLineError):
        run_polyphase(
            phase_input_files=["tests/data/polyploid.chr22.42M.12k.bam"],
            variant_file="tests/data/polyploid.chr22.42M.12k.vcf",
            ploidy=3,
            ignore_read_groups=True,
            output=outvcf,
        )
Ejemplo n.º 3
0
def test_polyphase_short_chr22(tmp_path):
    outvcf = tmp_path / "output.vcf"
    run_polyphase(
        phase_input_files=["tests/data/polyploid.chr22.42M.12k.bam"],
        variant_file="tests/data/polyploid.chr22.42M.12k.vcf",
        ploidy=4,
        ignore_read_groups=True,
        output=outvcf,
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "chr22"
    assert len(table.variants) == 42
    assert table.samples == ["HG00514_NA19240"]
Ejemplo n.º 4
0
def test_polyphase_multiple_bam(tmp_path):
    outvcf = tmp_path / "output.vcf"
    run_polyphase(
        phase_input_files=[
            "tests/data/polyploid.human1.chr22.42M.5k.bam",
            "tests/data/polyploid.human2.chr22.42M.5k.bam",
        ],
        variant_file="tests/data/polyploid.multisample.chr22.42M.5k.vcf",
        ploidy=2,
        ignore_read_groups=False,
        output=outvcf,
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "chr22"
    assert len(table.variants) == 9
    assert set(table.samples) == set(["HG00514", "NA19240"])
    assert not all(p is None for p in table.phases_of("HG00514"))
    assert not all(p is None for p in table.phases_of("NA19240"))
Ejemplo n.º 5
0
def test_polyphase_multithreaded(tmp_path):
    outvcf_st = tmp_path / "output_st.vcf"
    outvcf_mt = tmp_path / "output_mt.vcf"

    run_polyphase(
        phase_input_files=["tests/data/polyploid.chr22.42M.12k.bam"],
        variant_file="tests/data/polyploid.chr22.42M.12k.vcf",
        ploidy=4,
        ignore_read_groups=True,
        output=outvcf_st,
    )
    run_polyphase(
        phase_input_files=["tests/data/polyploid.chr22.42M.12k.bam"],
        variant_file="tests/data/polyploid.chr22.42M.12k.vcf",
        ploidy=4,
        ignore_read_groups=True,
        output=outvcf_mt,
        threads=4,
    )
    assert os.path.isfile(outvcf_st)
    assert os.path.isfile(outvcf_mt)

    tables_st = list(VcfReader(outvcf_st, phases=True))
    table_st = tables_st[0]
    tables_mt = list(VcfReader(outvcf_mt, phases=True))
    table_mt = tables_mt[0]

    assert table_st.chromosome == table_mt.chromosome
    assert table_st.samples == table_mt.samples
    assert all(
        [st == mt for (st, mt) in zip(table_st.genotypes, table_mt.genotypes)])
    assert all(
        [st == mt for (st, mt) in zip(table_st.phases, table_mt.phases)])
    assert all([
        st == mt for (st, mt) in zip(table_st.genotype_likelihoods,
                                     table_mt.genotype_likelihoods)
    ])
    assert all(
        [st == mt for (st, mt) in zip(table_st.variants, table_mt.variants)])