Esempio n. 1
0
def test_phase_three_individuals(algorithm, tmpdir):
    outvcf = str(tmpdir.join("output.vcf"))
    outreadlist = str(tmpdir.join("readlist.tsv"))
    run_whatshap(
        phase_input_files=[trio_bamfile],
        variant_file="tests/data/trio.vcf",
        read_list_filename=outreadlist,
        output=outvcf,
        algorithm=algorithm,
    )
    assert os.path.isfile(outvcf)
    assert os.path.isfile(outreadlist)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 5
    assert table.samples == ["HG004", "HG003", "HG002"]

    phase1 = VariantCallPhase(60906167, (0, 1), None)
    phase3 = VariantCallPhase(60907394, (0, 1), None)
    assert_phasing(table.phases_of("HG004"),
                   [None, phase3, phase3, phase3, None])
    assert_phasing(table.phases_of("HG003"),
                   [phase1, None, phase1, None, None])
    assert_phasing(table.phases_of("HG002"), [None, None, None, None, None])
Esempio n. 2
0
def test_phase_trio_use_ped_samples():
	with TemporaryDirectory() as tempdir:
		for ped_samples in [True, False]:
			outvcf = tempdir + '/output_ped_samples.vcf'
			outreadlist = tempdir + '/readlist.tsv'
			run_whatshap(phase_input_files=[ped_samples_bamfile], variant_file='tests/data/ped_samples.vcf', read_list_filename=outreadlist, output=outvcf,
				ped='tests/data/trio.ped', genmap='tests/data/trio.map', use_ped_samples=ped_samples)
			assert os.path.isfile(outvcf)
			assert os.path.isfile(outreadlist)

			tables = list(VcfReader(outvcf, phases=True))
			assert len(tables) == 1
			table = tables[0]
			assert table.chromosome == '1'
			assert len(table.variants) == 5
			assert table.samples == ['HG004', 'HG003', 'HG002', 'orphan']

			phase0 = VariantCallPhase(60906167, 0, None)
			phase1 = VariantCallPhase(60907394, 0, None)
			assert_phasing(table.phases_of('HG004'), [phase0, phase0, phase0, phase0, phase0])
			assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0])
			assert_phasing(table.phases_of('HG002'), [None, phase0, None, None, None])

			if ped_samples:
				assert_phasing(table.phases_of('orphan'), [None, None, None, None, None])
			else:
				assert_phasing(table.phases_of('orphan'), [None, phase1, phase1, phase1, None])
Esempio n. 3
0
def test_phase_specific_chromosome():
	for requested_chromosome in ['1','2']:
		with TemporaryDirectory() as tempdir:
			outvcf = tempdir + '/output.vcf'
			run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio-two-chromosomes.vcf', output=outvcf,
					ped='tests/data/trio.ped', genmap='tests/data/trio.map', chromosomes=[requested_chromosome])
			assert os.path.isfile(outvcf)

			tables = list(VcfReader(outvcf, phases=True))
			assert len(tables) == 2
			for table in tables:
				assert len(table.variants) == 5
				assert table.samples == ['HG004', 'HG003', 'HG002']
				if table.chromosome == '1' == requested_chromosome:
					phase0 = VariantCallPhase(60906167, 0, None)
					assert_phasing(table.phases_of('HG004'), [phase0, phase0, phase0, phase0, phase0])
					assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0])
					assert_phasing(table.phases_of('HG002'), [None, phase0, None, None, None])
				elif table.chromosome == '2' == requested_chromosome:
					phase0 = VariantCallPhase(60906167, 0, None)
					phase1 = VariantCallPhase(60906167, 1, None)
					assert_phasing(table.phases_of('HG004'), [phase0, None, None, None, phase1])
					assert_phasing(table.phases_of('HG003'), [phase0, None, None, None, None])
					assert_phasing(table.phases_of('HG002'), [None, None, None, None, phase0])
				else:
					assert_phasing(table.phases_of('HG004'), [None, None, None, None, None])
					assert_phasing(table.phases_of('HG003'), [None, None, None, None, None])
					assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
Esempio n. 4
0
def test_phase_trio_dont_merge_blocks(tmpdir):
    outvcf = str(tmpdir.join("output-merged-blocks.vcf"))
    run_whatshap(
        phase_input_files=[trio_merged_bamfile],
        variant_file="tests/data/trio-merged-blocks.vcf",
        output=outvcf,
        ped="tests/data/trio.ped",
        genmap="tests/data/trio.map",
        genetic_haplotyping=False,
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 8
    assert table.samples == ["HG002", "HG003", "HG004"]
    assert table.num_of_blocks_of("HG004") == 2
    assert table.num_of_blocks_of("HG003") == 1
    assert table.num_of_blocks_of("HG002") == 1

    phase1 = VariantCallPhase(752566, (1, 0), None)
    phase2_0 = VariantCallPhase(853954, (0, 1), None)
    phase2_1 = VariantCallPhase(853954, (1, 0), None)
    assert_phasing(
        table.phases_of("HG004"),
        [phase1, phase1, phase1, None, phase2_1, phase2_1, phase2_1, phase2_1],
    )
    assert_phasing(
        table.phases_of("HG003"),
        [None, None, None, None, phase2_0, phase2_0, phase2_0, phase2_1],
    )
    assert_phasing(table.phases_of("HG002"),
                   [None, None, None, None, None, None, None, phase2_1])
Esempio n. 5
0
def test_phase_trio_paired_end_reads(tmp_path):
    outvcf = tmp_path / "output-paired_end.vcf"
    run_whatshap(
        phase_input_files=[trio_paired_end_bamfile],
        variant_file="tests/data/paired_end.sorted.vcf",
        output=outvcf,
        ped="tests/data/trio_paired_end.ped",
        genmap="tests/data/trio.map",
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 3
    assert table.samples == ["mother", "father", "child"]
    assert table.num_of_blocks_of("mother") == 1
    assert table.num_of_blocks_of("father") == 0
    assert table.num_of_blocks_of("child") == 1

    phase0 = VariantCallPhase(80050, (0, 1), None)
    phase1 = VariantCallPhase(80050, (1, 0), None)

    assert_phasing(table.phases_of("mother"), [phase1, phase1, phase0])
    assert_phasing(table.phases_of("father"), [None, None, None])
    assert_phasing(table.phases_of("child"), [None, None, phase1])
Esempio n. 6
0
def test_phase_three_individuals(algorithm):
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output.vcf'
		outreadlist = tempdir + '/readlist.tsv'
		run_whatshap(
			phase_input_files=[trio_bamfile],
			variant_file='tests/data/trio.vcf',
			read_list_filename=outreadlist,
			output=outvcf,
			algorithm=algorithm)
		assert os.path.isfile(outvcf)
		assert os.path.isfile(outreadlist)

		tables = list(VcfReader(outvcf, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 5
		assert table.samples == ['HG004', 'HG003', 'HG002']

		phase1 = VariantCallPhase(60906167, 0, None)
		phase3 = VariantCallPhase(60907394, 0, None)
		assert_phasing(table.phases_of('HG004'), [None, phase3, phase3, phase3, None])
		assert_phasing(table.phases_of('HG003'), [phase1, None, phase1, None, None])
		assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
Esempio n. 7
0
def test_phase_trio_distrust_genotypes(tmpdir):
    outvcf = str(tmpdir.join("output_gl.vcf"))
    outreadlist = str(tmpdir.join("readlist.tsv"))
    run_whatshap(
        phase_input_files=[trio_bamfile],
        variant_file="tests/data/trio_genotype_likelihoods.vcf",
        read_list_filename=outreadlist,
        output=outvcf,
        ped="tests/data/trio.ped",
        genmap="tests/data/trio.map",
        distrust_genotypes=True,
    )
    assert os.path.isfile(outvcf)
    assert os.path.isfile(outreadlist)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 5
    assert table.samples == ["HG004", "HG003", "HG002"]

    phase0 = VariantCallPhase(60906167, (0, 1), None)
    assert_phasing(table.phases_of("HG004"),
                   [None, phase0, phase0, phase0, None])
    assert_phasing(table.phases_of("HG003"),
                   [phase0, None, phase0, phase0, phase0])
    assert_phasing(table.phases_of("HG002"),
                   [phase0, None, phase0, phase0, phase0])
Esempio n. 8
0
def test_phase_quartet_recombination_breakpoints(expect_recombination,
                                                 parameters, tmp_path):
    outvcf = tmp_path / "output-recombination_breaks.vcf"
    outlist = tmp_path / "output.recomb"
    run_whatshap(
        phase_input_files=[recombination_breaks_bamfile],
        variant_file="tests/data/quartet.vcf.gz",
        output=outvcf,
        ped="tests/data/recombination_breaks.ped",
        recombination_list_filename=outlist,
        **parameters,
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 4
    assert table.samples == ["HG002", "HG005", "HG003", "HG004"]
    assert table.num_of_blocks_of("HG002") == 0
    assert table.num_of_blocks_of("HG005") == 0
    assert table.num_of_blocks_of("HG003") == 1
    assert table.num_of_blocks_of("HG004") == 0

    phase0 = VariantCallPhase(68735304, (0, 1), None)
    phase1 = VariantCallPhase(68735304, (1, 0), None)

    assert_phasing(table.phases_of("HG002"), [None, None, None, None])
    assert_phasing(table.phases_of("HG005"), [None, None, None, None])
    if expect_recombination:
        assert_phasing(table.phases_of("HG003"),
                       [phase0, phase0, None, phase1])
    else:
        assert_phasing(table.phases_of("HG003"),
                       [phase0, phase0, None, phase0])
    assert_phasing(table.phases_of("HG004"), [None, None, None, None])

    lines = open(outlist).readlines()
    if expect_recombination:
        assert len(lines) == 3
        assert lines[1] == "HG002 1 68735433 68738308 0 1 0 0 3\n"
        assert lines[2] == "HG005 1 68735433 68738308 0 1 0 0 3\n"
    else:
        assert len(lines) == 1
Esempio n. 9
0
def test_read_tetraploid_phased():
    tables = list(VcfReader("tests/data/polyploid.chr22.phased.vcf", phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "chr22"
    assert table.samples == ["HG00514_NA19240"]
    assert len(table.variants) == 8

    expected_phase = [
        VariantCallPhase(block_id=20000000, phase=(1, 0, 1, 1), quality=None),
        VariantCallPhase(block_id=20000000, phase=(1, 0, 1, 0), quality=None),
        None,
        VariantCallPhase(block_id=20000000, phase=(1, 0, 1, 1), quality=None),
        VariantCallPhase(block_id=20001000, phase=(1, 0, 1, 1), quality=None),
        VariantCallPhase(block_id=20001000, phase=(0, 0, 0, 1), quality=None),
        VariantCallPhase(block_id=20001000, phase=(0, 0, 0, 1), quality=None),
        VariantCallPhase(block_id=20001000, phase=(0, 0, 0, 1), quality=None),
    ]
    print("Got:")
    for variant in table.phases[0]:
        print(variant)
    print("Exp:")
    for variant in expected_phase:
        print(variant)
    assert list(table.phases[0]) == expected_phase
Esempio n. 10
0
def test_phase_quartet_recombination_breakpoints():
	parameter_sets = [
		(False, {'genmap':'tests/data/recombination_breaks.map'}),
		(True, {'recombrate':1000000}),
		(False, {'recombrate':.0000001})
	]
	
	for expect_recombination, parameters in parameter_sets:
		with TemporaryDirectory() as tempdir:
			outvcf = tempdir + '/output-recombination_breaks.vcf'
			outlist = tempdir + '/output.recomb'
			run_whatshap(phase_input_files=[recombination_breaks_bamfile], variant_file='tests/data/quartet.vcf.gz', output=outvcf,
					ped='tests/data/recombination_breaks.ped', recombination_list_filename = outlist, **parameters)
			assert os.path.isfile(outvcf)

			tables = list(VcfReader(outvcf, phases=True))
			assert len(tables) == 1
			table = tables[0]
			assert table.chromosome == '1'
			assert len(table.variants) == 4
			assert table.samples == ['HG002', 'HG005', 'HG003', 'HG004']
			assert table.num_of_blocks_of('HG002') == 0
			assert table.num_of_blocks_of('HG005') == 0
			assert table.num_of_blocks_of('HG003') == 1
			assert table.num_of_blocks_of('HG004') == 0

			phase0 = VariantCallPhase(68735304, 0, None)
			phase1 = VariantCallPhase(68735304, 1, None)

			assert_phasing(table.phases_of('HG002'), [None, None, None, None])
			assert_phasing(table.phases_of('HG005'), [None, None, None, None])
			if expect_recombination:
				assert_phasing(table.phases_of('HG003'), [phase0, phase0, None, phase1])
			else:
				assert_phasing(table.phases_of('HG003'), [phase0, phase0, None, phase0])
			assert_phasing(table.phases_of('HG004'), [None, None, None, None])
			
			lines = open(outlist).readlines()
			if expect_recombination:
				assert len(lines) == 3
				assert lines[1]=='HG002 1 68735433 68738308 0 0 0 1 3\n'
				assert lines[2]=='HG005 1 68735433 68738308 0 0 0 1 3\n'
			else:
				assert len(lines) == 1
Esempio n. 11
0
def test_phase_specific_chromosome(chromosome, tmp_path):
    outvcf = tmp_path / "output.vcf"
    run_whatshap(
        phase_input_files=[trio_bamfile],
        variant_file="tests/data/trio-two-chromosomes.vcf",
        output=outvcf,
        ped="tests/data/trio.ped",
        genmap="tests/data/trio.map",
        chromosomes=[chromosome],
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 2
    for table in tables:
        assert len(table.variants) == 5
        assert table.samples == ["HG004", "HG003", "HG002"]
        if table.chromosome == "1" == chromosome:
            phase0 = VariantCallPhase(60906167, (0, 1), None)
            assert_phasing(
                table.phases_of("HG004"),
                [phase0, phase0, phase0, phase0, phase0],
            )
            assert_phasing(table.phases_of("HG003"),
                           [phase0, None, phase0, phase0, phase0])
            assert_phasing(table.phases_of("HG002"),
                           [None, phase0, None, None, None])
        elif table.chromosome == "2" == chromosome:
            phase0 = VariantCallPhase(60906167, (0, 1), None)
            phase1 = VariantCallPhase(60906167, (1, 0), None)
            assert_phasing(table.phases_of("HG004"),
                           [phase0, None, None, None, phase1])
            assert_phasing(table.phases_of("HG003"),
                           [phase0, None, None, None, None])
            assert_phasing(table.phases_of("HG002"),
                           [None, None, None, None, phase0])
        else:
            assert_phasing(table.phases_of("HG004"),
                           [None, None, None, None, None])
            assert_phasing(table.phases_of("HG003"),
                           [None, None, None, None, None])
            assert_phasing(table.phases_of("HG002"),
                           [None, None, None, None, None])
Esempio n. 12
0
def test_genetic_haplotyping(tmp_path):
    outvcf = tmp_path / "output.vcf"
    outrecomb = tmp_path / "utput.recomb"
    run_whatshap(
        variant_file="tests/data/genetic-haplotyping.vcf",
        phase_input_files=[],
        ped="tests/data/genetic-haplotyping.ped",
        output=outvcf,
        recombination_list_filename=outrecomb,
    )
    tables = list(VcfReader(outvcf, phases=True))

    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 3
    assert table.samples == [
        "sampleA", "sampleB", "sampleC", "sampleD", "sampleE"
    ]
    assert table.num_of_blocks_of("sampleA") == 1
    assert table.num_of_blocks_of("sampleB") == 1
    assert table.num_of_blocks_of("sampleC") == 0
    assert table.num_of_blocks_of("sampleD") == 1
    assert table.num_of_blocks_of("sampleE") == 1

    phase0 = VariantCallPhase(10327, (0, 1), None)
    phase1 = VariantCallPhase(10327, (1, 0), None)

    assert_phasing(table.phases_of("sampleA"), [phase0, phase0, phase1])
    assert_phasing(table.phases_of("sampleB"), [phase0, None, None])
    assert_phasing(table.phases_of("sampleC"), [None, None, None])
    assert_phasing(table.phases_of("sampleD"), [phase0, None, phase1])
    assert_phasing(table.phases_of("sampleE"), [phase0, phase0, None])

    lines = [l.split() for l in open(outrecomb)]
    assert len(lines) == 2
    Fields = namedtuple("Fields", [f.strip("#\n") for f in lines[0]])
    recomb = Fields(*lines[1])
    print(recomb)
    assert recomb.child_id == "sampleC"
    assert recomb.chromosome == "1"
    assert recomb.position1 == "31295"
    assert recomb.position2 == "102596"
Esempio n. 13
0
def test_indel_phasing(algorithm):
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output.vcf'
		run_whatshap(
			phase_input_files=[indels_bamfile],
			indels=True, variant_file='tests/data/indels.vcf',
			reference='tests/data/random0.fasta',
			output=outvcf,
			algorithm=algorithm)
		assert os.path.isfile(outvcf)

		tables = list(VcfReader(outvcf, indels=True, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == 'random0'
		assert len(table.variants) == 4
		assert table.samples == ['sample1']

		phase0 = VariantCallPhase(41, 0, None)
		phase1 = VariantCallPhase(41, 1, None)
		assert_phasing(table.phases_of('sample1'), [phase0, phase1, phase0, phase1])
Esempio n. 14
0
def test_read_phased_vcf():
    for filename in ["tests/data/phased-via-HP.vcf", "tests/data/phased-via-PS.vcf"]:
        print("Testing", filename)
        tables = list(VcfReader(filename, phases=True))
        assert len(tables) == 2
        table_a, table_b = tables

        assert table_a.chromosome == "chrA"
        assert len(table_a.variants) == 4
        assert table_a.samples == ["sample1", "sample2"]

        assert table_b.chromosome == "chrB"
        assert len(table_b.variants) == 2
        assert table_b.samples == ["sample1", "sample2"]

        assert len(table_a.genotypes) == 2
        assert list(table_a.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1])
        assert list(table_a.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1])
        assert list(table_a.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list(
            [1, 2, 1, 1]
        )
        assert list(table_a.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list(
            [1, 1, 1, 1]
        )

        assert len(table_b.genotypes) == 2
        assert list(table_b.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([0, 1])
        assert list(table_b.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 2])
        assert list(table_b.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list(
            [0, 1]
        )
        assert list(table_b.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list(
            [1, 2]
        )

        print(table_a.phases)
        assert len(table_a.phases) == 2
        expected_phase_sample1 = [
            None,
            None,
            VariantCallPhase(block_id=300, phase=(1, 0), quality=23),
            VariantCallPhase(block_id=300, phase=(0, 1), quality=42),
        ]
        expected_phase_sample2 = [
            VariantCallPhase(block_id=100, phase=(0, 1), quality=10),
            VariantCallPhase(block_id=100, phase=(1, 0), quality=20),
            VariantCallPhase(block_id=300, phase=(0, 1), quality=30),
            VariantCallPhase(block_id=300, phase=(0, 1), quality=None),
        ]
        assert list(table_a.phases[0]) == expected_phase_sample1
        assert list(table_a.phases[1]) == expected_phase_sample2
        assert list(table_a.phases_of("sample1")) == expected_phase_sample1
        assert list(table_a.phases_of("sample2")) == expected_phase_sample2

        assert len(table_b.phases) == 2
        assert list(table_b.phases[0]) == [None, None]
        assert list(table_b.phases[1]) == [None, None]
        assert list(table_b.phases_of("sample1")) == [None, None]
        assert list(table_b.phases_of("sample2")) == [None, None]
Esempio n. 15
0
def test_phase_trio_merged_blocks():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output-merged-blocks.vcf'
		run_whatshap(phase_input_files=[trio_merged_bamfile], variant_file='tests/data/trio-merged-blocks.vcf', output=outvcf,
		        ped='tests/data/trio.ped', genmap='tests/data/trio.map')
		assert os.path.isfile(outvcf)

		tables = list(VcfReader(outvcf, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 8
		assert table.samples == ['HG002', 'HG003', 'HG004']
		assert table.num_of_blocks_of('HG004') == 1
		assert table.num_of_blocks_of('HG003') == 1
		assert table.num_of_blocks_of('HG002') == 1

		phase0 = VariantCallPhase(752566, 0, None)
		phase1 = VariantCallPhase(752566, 1, None)
		assert_phasing(table.phases_of('HG004'), [phase1, phase1, phase1, None, phase1, phase1, phase1, phase1])
		assert_phasing(table.phases_of('HG003'), [None, None, None, None, phase0, phase0, phase0, phase1])
		assert_phasing(table.phases_of('HG002'), [None, None, None, None, None, None, None, phase1])
Esempio n. 16
0
def test_genetic_haplotyping():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output.vcf'
		outrecomb = tempdir + '/output.recomb'
		run_whatshap(variant_file='tests/data/genetic-haplotyping.vcf', phase_input_files=[],
			ped='tests/data/genetic-haplotyping.ped', output=outvcf,
			recombination_list_filename=outrecomb)
		tables = list(VcfReader(outvcf, phases=True))

		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 3
		assert table.samples == ['sampleA', 'sampleB', 'sampleC', 'sampleD', 'sampleE']
		assert table.num_of_blocks_of('sampleA') == 1
		assert table.num_of_blocks_of('sampleB') == 1
		assert table.num_of_blocks_of('sampleC') == 0
		assert table.num_of_blocks_of('sampleD') == 1
		assert table.num_of_blocks_of('sampleE') == 1

		phase0 = VariantCallPhase(10327, 0, None)
		phase1 = VariantCallPhase(10327, 1, None)

		assert_phasing(table.phases_of('sampleA'), [phase0, phase0, phase1])
		assert_phasing(table.phases_of('sampleB'), [phase0, None, None])
		assert_phasing(table.phases_of('sampleC'), [None, None, None])
		assert_phasing(table.phases_of('sampleD'), [phase0, None, phase1])
		assert_phasing(table.phases_of('sampleE'), [phase0, phase0, None])

		lines = [l.split() for l in open(outrecomb)]
		assert len(lines) == 2
		Fields = namedtuple('Fields', [f.strip('#\n') for f in lines[0]])
		recomb = Fields(*lines[1])
		print(recomb)
		assert recomb.child_id == 'sampleC'
		assert recomb.chromosome == '1'
		assert recomb.position1 == '31295'
		assert recomb.position2 == '102596'
Esempio n. 17
0
def test_indel_phasing(algorithm, tmp_path):
    outvcf = tmp_path / "output.vcf"
    run_whatshap(
        phase_input_files=[indels_bamfile],
        indels=True,
        variant_file="tests/data/indels.vcf",
        reference="tests/data/random0.fasta",
        output=outvcf,
        algorithm=algorithm,
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, indels=True, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "random0"
    assert len(table.variants) == 4
    assert table.samples == ["sample1"]

    phase0 = VariantCallPhase(41, (0, 1), None)
    phase1 = VariantCallPhase(41, (1, 0), None)
    assert_phasing(table.phases_of("sample1"),
                   [phase0, phase1, phase0, phase1])
Esempio n. 18
0
def test_phase_trio_paired_end_reads():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output-paired_end.vcf'
		run_whatshap(phase_input_files=[trio_paired_end_bamfile], variant_file='tests/data/paired_end.sorted.vcf', output=outvcf,
		        ped='tests/data/trio_paired_end.ped', genmap='tests/data/trio.map')
		assert os.path.isfile(outvcf)

		tables = list(VcfReader(outvcf, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 3
		assert table.samples == ['mother', 'father', 'child']
		assert table.num_of_blocks_of('mother') == 1
		assert table.num_of_blocks_of('father') == 0
		assert table.num_of_blocks_of('child') == 1

		phase0 = VariantCallPhase(80050, 0, None)
		phase1 = VariantCallPhase(80050, 1, None)

		assert_phasing(table.phases_of('mother'), [phase1, phase1, phase0])
		assert_phasing(table.phases_of('father'), [None, None, None])
		assert_phasing(table.phases_of('child'), [None, None, phase1])
Esempio n. 19
0
def test_phase_trio_use_ped_samples(ped_samples, tmpdir):
    outvcf = str(tmpdir.join("output_ped_samples.vcf"))
    outreadlist = str(tmpdir.join("readlist.tsv"))
    run_whatshap(
        phase_input_files=[ped_samples_bamfile],
        variant_file="tests/data/ped_samples.vcf",
        read_list_filename=outreadlist,
        output=outvcf,
        ped="tests/data/trio.ped",
        genmap="tests/data/trio.map",
        use_ped_samples=ped_samples,
    )
    assert os.path.isfile(outvcf)
    assert os.path.isfile(outreadlist)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 5
    assert table.samples == ["HG004", "HG003", "HG002", "orphan"]

    phase0 = VariantCallPhase(60906167, (0, 1), None)
    phase1 = VariantCallPhase(60907394, (0, 1), None)
    assert_phasing(table.phases_of("HG004"),
                   [phase0, phase0, phase0, phase0, phase0])
    assert_phasing(table.phases_of("HG003"),
                   [phase0, None, phase0, phase0, phase0])
    assert_phasing(table.phases_of("HG002"), [None, phase0, None, None, None])

    if ped_samples:
        assert_phasing(table.phases_of("orphan"),
                       [None, None, None, None, None])
    else:
        assert_phasing(table.phases_of("orphan"),
                       [None, phase1, phase1, phase1, None])
Esempio n. 20
0
def test_distrust_genotypes_assertion(tmp_path):
    outvcf = tmp_path / "output.vcf"
    run_whatshap(
        indels=False,
        phase_input_files=[dist_geno_bamfile],
        variant_file="tests/data/test_dist_geno.vcf",
        output=outvcf,
    )
    assert os.path.isfile(outvcf)
    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "chr1"
    phase0 = VariantCallPhase(23824647, (0, 1), None)
    assert_phasing(table.phases_of("NA12878"), [None, phase0, None, phase0])
Esempio n. 21
0
def test_phase_one_of_three_individuals():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output.vcf'
		run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio.vcf', output=outvcf, samples=['HG003'])
		assert os.path.isfile(outvcf)

		tables = list(VcfReader(outvcf, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 5
		assert table.samples == ['HG004', 'HG003', 'HG002']

		phase0 = VariantCallPhase(60906167,0,None)
		assert_phasing(table.phases_of('HG004'), [None, None, None, None, None])
		assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, None, None])
		assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
Esempio n. 22
0
def test_read_phased_vcf():
    for filename in [
            'tests/data/phased-via-HP.vcf', 'tests/data/phased-via-PS.vcf'
    ]:
        print('Testing', filename)
        tables = list(VcfReader(filename, phases=True))
        assert len(tables) == 2
        table_a, table_b = tables

        assert table_a.chromosome == 'chrA'
        assert len(table_a.variants) == 4
        assert table_a.samples == ['sample1', 'sample2']

        assert table_b.chromosome == 'chrB'
        assert len(table_b.variants) == 2
        assert table_b.samples == ['sample1', 'sample2']

        assert len(table_a.genotypes) == 2
        assert list(table_a.genotypes[0]) == [1, 2, 1, 1]
        assert list(table_a.genotypes[1]) == [1, 1, 1, 1]
        assert list(table_a.genotypes_of('sample1')) == [1, 2, 1, 1]
        assert list(table_a.genotypes_of('sample2')) == [1, 1, 1, 1]

        assert len(table_b.genotypes) == 2
        assert list(table_b.genotypes[0]) == [0, 1]
        assert list(table_b.genotypes[1]) == [1, 2]
        assert list(table_b.genotypes_of('sample1')) == [0, 1]
        assert list(table_b.genotypes_of('sample2')) == [1, 2]

        print(table_a.phases)
        assert len(table_a.phases) == 2
        expected_phase_sample1 = [
            None, None,
            VariantCallPhase(block_id=300, phase=1, quality=23),
            VariantCallPhase(block_id=300, phase=0, quality=42)
        ]
        expected_phase_sample2 = [
            VariantCallPhase(block_id=100, phase=0, quality=10),
            VariantCallPhase(block_id=100, phase=1, quality=20),
            VariantCallPhase(block_id=300, phase=0, quality=30),
            VariantCallPhase(block_id=300, phase=0, quality=None)
        ]
        assert list(table_a.phases[0]) == expected_phase_sample1
        assert list(table_a.phases[1]) == expected_phase_sample2
        assert list(table_a.phases_of('sample1')) == expected_phase_sample1
        assert list(table_a.phases_of('sample2')) == expected_phase_sample2

        assert len(table_b.phases) == 2
        assert list(table_b.phases[0]) == [None, None]
        assert list(table_b.phases[1]) == [None, None]
        assert list(table_b.phases_of('sample1')) == [None, None]
        assert list(table_b.phases_of('sample2')) == [None, None]
Esempio n. 23
0
def test_genetic_phasing_symbolic_alt():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output.vcf'
		run_whatshap(phase_input_files=[], variant_file='tests/data/trio-symbolic-alt.vcf', output=outvcf,
		        ped='tests/data/trio.ped', indels=True)
		assert os.path.isfile(outvcf)

		tables = list(VcfReader(outvcf, phases=True, indels=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 5
		assert table.samples == ['HG004', 'HG003', 'HG002']

		phase0 = VariantCallPhase(60906167, 0, None)
		assert_phasing(table.phases_of('HG004'), [phase0, phase0, phase0, phase0, phase0])
		assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0])
		assert_phasing(table.phases_of('HG002'), [None, phase0, None, None, None])
Esempio n. 24
0
def test_phase_mendelian_conflict():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output.vcf'
		run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio-mendelian-conflict.vcf', output=outvcf,
				ped='tests/data/trio.ped', genmap='tests/data/trio.map')
		assert os.path.isfile(outvcf)

		tables = list(VcfReader(outvcf, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 5
		assert table.samples == ['HG004', 'HG003', 'HG002']

		phase = VariantCallPhase(60906167, 0, None)
		assert_phasing(table.phases_of('HG004'), [phase, None, phase, phase, phase])
		assert_phasing(table.phases_of('HG003'), [phase, None, phase, phase, phase])
		assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
Esempio n. 25
0
def test_phase_trio_distrust_genotypes():
	with TemporaryDirectory() as tempdir:
		outvcf = tempdir + '/output_gl.vcf'
		outreadlist = tempdir + '/readlist.tsv'
		run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio_genotype_likelihoods.vcf', read_list_filename=outreadlist, output=outvcf,
		        ped='tests/data/trio.ped', genmap='tests/data/trio.map', distrust_genotypes=True)
		assert os.path.isfile(outvcf)
		assert os.path.isfile(outreadlist)

		tables = list(VcfReader(outvcf, phases=True))
		assert len(tables) == 1
		table = tables[0]
		assert table.chromosome == '1'
		assert len(table.variants) == 5
		assert table.samples == ['HG004', 'HG003', 'HG002']

		phase0 = VariantCallPhase(60906167, 0, None)
		assert_phasing(table.phases_of('HG004'), [None, phase0, phase0, phase0, None])
		assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0])
		assert_phasing(table.phases_of('HG002'), [phase0, None, phase0, phase0, phase0])
Esempio n. 26
0
def test_phase_missing_genotypes(tmp_path):
    outvcf = tmp_path / "output.vcf"
    run_whatshap(
        phase_input_files=[trio_bamfile],
        variant_file="tests/data/trio-missing-genotypes.vcf",
        output=outvcf,
        ped="tests/data/trio.ped",
        genmap="tests/data/trio.map",
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 5
    assert table.samples == ["HG004", "HG003", "HG002"]

    phase = VariantCallPhase(60906167, (0, 1), None)
    assert_phasing(table.phases_of("HG004"), [phase, phase, None, phase, None])
    assert_phasing(table.phases_of("HG003"), [phase, None, None, phase, None])
    assert_phasing(table.phases_of("HG002"), [None, phase, None, None, None])
Esempio n. 27
0
def test_phase_mendelian_conflict(tmpdir):
    outvcf = str(tmpdir.join("output.vcf"))
    run_whatshap(
        phase_input_files=[trio_bamfile],
        variant_file="tests/data/trio-mendelian-conflict.vcf",
        output=outvcf,
        ped="tests/data/trio.ped",
        genmap="tests/data/trio.map",
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 5
    assert table.samples == ["HG004", "HG003", "HG002"]

    phase = VariantCallPhase(60906167, (0, 1), None)
    assert_phasing(table.phases_of("HG004"),
                   [phase, None, phase, phase, phase])
    assert_phasing(table.phases_of("HG003"),
                   [phase, None, phase, phase, phase])
    assert_phasing(table.phases_of("HG002"), [None, None, None, None, None])
Esempio n. 28
0
def test_genetic_phasing_symbolic_alt(tmpdir):
    outvcf = str(tmpdir.join("output.vcf"))
    run_whatshap(
        phase_input_files=[],
        variant_file="tests/data/trio-symbolic-alt.vcf",
        output=outvcf,
        ped="tests/data/trio.ped",
        indels=True,
    )
    assert os.path.isfile(outvcf)

    tables = list(VcfReader(outvcf, phases=True, indels=True))
    assert len(tables) == 1
    table = tables[0]
    assert table.chromosome == "1"
    assert len(table.variants) == 5
    assert table.samples == ["HG004", "HG003", "HG002"]

    phase0 = VariantCallPhase(60906167, (0, 1), None)
    assert_phasing(table.phases_of("HG004"),
                   [phase0, phase0, phase0, phase0, phase0])
    assert_phasing(table.phases_of("HG003"),
                   [phase0, None, phase0, phase0, phase0])
    assert_phasing(table.phases_of("HG002"), [None, phase0, None, None, None])