def test_phase_three_individuals(algorithm, tmpdir): outvcf = str(tmpdir.join("output.vcf")) outreadlist = str(tmpdir.join("readlist.tsv")) run_whatshap( phase_input_files=[trio_bamfile], variant_file="tests/data/trio.vcf", read_list_filename=outreadlist, output=outvcf, algorithm=algorithm, ) assert os.path.isfile(outvcf) assert os.path.isfile(outreadlist) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002"] phase1 = VariantCallPhase(60906167, (0, 1), None) phase3 = VariantCallPhase(60907394, (0, 1), None) assert_phasing(table.phases_of("HG004"), [None, phase3, phase3, phase3, None]) assert_phasing(table.phases_of("HG003"), [phase1, None, phase1, None, None]) assert_phasing(table.phases_of("HG002"), [None, None, None, None, None])
def test_phase_trio_use_ped_samples(): with TemporaryDirectory() as tempdir: for ped_samples in [True, False]: outvcf = tempdir + '/output_ped_samples.vcf' outreadlist = tempdir + '/readlist.tsv' run_whatshap(phase_input_files=[ped_samples_bamfile], variant_file='tests/data/ped_samples.vcf', read_list_filename=outreadlist, output=outvcf, ped='tests/data/trio.ped', genmap='tests/data/trio.map', use_ped_samples=ped_samples) assert os.path.isfile(outvcf) assert os.path.isfile(outreadlist) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002', 'orphan'] phase0 = VariantCallPhase(60906167, 0, None) phase1 = VariantCallPhase(60907394, 0, None) assert_phasing(table.phases_of('HG004'), [phase0, phase0, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG002'), [None, phase0, None, None, None]) if ped_samples: assert_phasing(table.phases_of('orphan'), [None, None, None, None, None]) else: assert_phasing(table.phases_of('orphan'), [None, phase1, phase1, phase1, None])
def test_phase_specific_chromosome(): for requested_chromosome in ['1','2']: with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio-two-chromosomes.vcf', output=outvcf, ped='tests/data/trio.ped', genmap='tests/data/trio.map', chromosomes=[requested_chromosome]) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 2 for table in tables: assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002'] if table.chromosome == '1' == requested_chromosome: phase0 = VariantCallPhase(60906167, 0, None) assert_phasing(table.phases_of('HG004'), [phase0, phase0, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG002'), [None, phase0, None, None, None]) elif table.chromosome == '2' == requested_chromosome: phase0 = VariantCallPhase(60906167, 0, None) phase1 = VariantCallPhase(60906167, 1, None) assert_phasing(table.phases_of('HG004'), [phase0, None, None, None, phase1]) assert_phasing(table.phases_of('HG003'), [phase0, None, None, None, None]) assert_phasing(table.phases_of('HG002'), [None, None, None, None, phase0]) else: assert_phasing(table.phases_of('HG004'), [None, None, None, None, None]) assert_phasing(table.phases_of('HG003'), [None, None, None, None, None]) assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
def test_phase_trio_dont_merge_blocks(tmpdir): outvcf = str(tmpdir.join("output-merged-blocks.vcf")) run_whatshap( phase_input_files=[trio_merged_bamfile], variant_file="tests/data/trio-merged-blocks.vcf", output=outvcf, ped="tests/data/trio.ped", genmap="tests/data/trio.map", genetic_haplotyping=False, ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 8 assert table.samples == ["HG002", "HG003", "HG004"] assert table.num_of_blocks_of("HG004") == 2 assert table.num_of_blocks_of("HG003") == 1 assert table.num_of_blocks_of("HG002") == 1 phase1 = VariantCallPhase(752566, (1, 0), None) phase2_0 = VariantCallPhase(853954, (0, 1), None) phase2_1 = VariantCallPhase(853954, (1, 0), None) assert_phasing( table.phases_of("HG004"), [phase1, phase1, phase1, None, phase2_1, phase2_1, phase2_1, phase2_1], ) assert_phasing( table.phases_of("HG003"), [None, None, None, None, phase2_0, phase2_0, phase2_0, phase2_1], ) assert_phasing(table.phases_of("HG002"), [None, None, None, None, None, None, None, phase2_1])
def test_phase_trio_paired_end_reads(tmp_path): outvcf = tmp_path / "output-paired_end.vcf" run_whatshap( phase_input_files=[trio_paired_end_bamfile], variant_file="tests/data/paired_end.sorted.vcf", output=outvcf, ped="tests/data/trio_paired_end.ped", genmap="tests/data/trio.map", ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 3 assert table.samples == ["mother", "father", "child"] assert table.num_of_blocks_of("mother") == 1 assert table.num_of_blocks_of("father") == 0 assert table.num_of_blocks_of("child") == 1 phase0 = VariantCallPhase(80050, (0, 1), None) phase1 = VariantCallPhase(80050, (1, 0), None) assert_phasing(table.phases_of("mother"), [phase1, phase1, phase0]) assert_phasing(table.phases_of("father"), [None, None, None]) assert_phasing(table.phases_of("child"), [None, None, phase1])
def test_phase_three_individuals(algorithm): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' outreadlist = tempdir + '/readlist.tsv' run_whatshap( phase_input_files=[trio_bamfile], variant_file='tests/data/trio.vcf', read_list_filename=outreadlist, output=outvcf, algorithm=algorithm) assert os.path.isfile(outvcf) assert os.path.isfile(outreadlist) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002'] phase1 = VariantCallPhase(60906167, 0, None) phase3 = VariantCallPhase(60907394, 0, None) assert_phasing(table.phases_of('HG004'), [None, phase3, phase3, phase3, None]) assert_phasing(table.phases_of('HG003'), [phase1, None, phase1, None, None]) assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
def test_phase_trio_distrust_genotypes(tmpdir): outvcf = str(tmpdir.join("output_gl.vcf")) outreadlist = str(tmpdir.join("readlist.tsv")) run_whatshap( phase_input_files=[trio_bamfile], variant_file="tests/data/trio_genotype_likelihoods.vcf", read_list_filename=outreadlist, output=outvcf, ped="tests/data/trio.ped", genmap="tests/data/trio.map", distrust_genotypes=True, ) assert os.path.isfile(outvcf) assert os.path.isfile(outreadlist) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002"] phase0 = VariantCallPhase(60906167, (0, 1), None) assert_phasing(table.phases_of("HG004"), [None, phase0, phase0, phase0, None]) assert_phasing(table.phases_of("HG003"), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of("HG002"), [phase0, None, phase0, phase0, phase0])
def test_phase_quartet_recombination_breakpoints(expect_recombination, parameters, tmp_path): outvcf = tmp_path / "output-recombination_breaks.vcf" outlist = tmp_path / "output.recomb" run_whatshap( phase_input_files=[recombination_breaks_bamfile], variant_file="tests/data/quartet.vcf.gz", output=outvcf, ped="tests/data/recombination_breaks.ped", recombination_list_filename=outlist, **parameters, ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 4 assert table.samples == ["HG002", "HG005", "HG003", "HG004"] assert table.num_of_blocks_of("HG002") == 0 assert table.num_of_blocks_of("HG005") == 0 assert table.num_of_blocks_of("HG003") == 1 assert table.num_of_blocks_of("HG004") == 0 phase0 = VariantCallPhase(68735304, (0, 1), None) phase1 = VariantCallPhase(68735304, (1, 0), None) assert_phasing(table.phases_of("HG002"), [None, None, None, None]) assert_phasing(table.phases_of("HG005"), [None, None, None, None]) if expect_recombination: assert_phasing(table.phases_of("HG003"), [phase0, phase0, None, phase1]) else: assert_phasing(table.phases_of("HG003"), [phase0, phase0, None, phase0]) assert_phasing(table.phases_of("HG004"), [None, None, None, None]) lines = open(outlist).readlines() if expect_recombination: assert len(lines) == 3 assert lines[1] == "HG002 1 68735433 68738308 0 1 0 0 3\n" assert lines[2] == "HG005 1 68735433 68738308 0 1 0 0 3\n" else: assert len(lines) == 1
def test_read_tetraploid_phased(): tables = list(VcfReader("tests/data/polyploid.chr22.phased.vcf", phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "chr22" assert table.samples == ["HG00514_NA19240"] assert len(table.variants) == 8 expected_phase = [ VariantCallPhase(block_id=20000000, phase=(1, 0, 1, 1), quality=None), VariantCallPhase(block_id=20000000, phase=(1, 0, 1, 0), quality=None), None, VariantCallPhase(block_id=20000000, phase=(1, 0, 1, 1), quality=None), VariantCallPhase(block_id=20001000, phase=(1, 0, 1, 1), quality=None), VariantCallPhase(block_id=20001000, phase=(0, 0, 0, 1), quality=None), VariantCallPhase(block_id=20001000, phase=(0, 0, 0, 1), quality=None), VariantCallPhase(block_id=20001000, phase=(0, 0, 0, 1), quality=None), ] print("Got:") for variant in table.phases[0]: print(variant) print("Exp:") for variant in expected_phase: print(variant) assert list(table.phases[0]) == expected_phase
def test_phase_quartet_recombination_breakpoints(): parameter_sets = [ (False, {'genmap':'tests/data/recombination_breaks.map'}), (True, {'recombrate':1000000}), (False, {'recombrate':.0000001}) ] for expect_recombination, parameters in parameter_sets: with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output-recombination_breaks.vcf' outlist = tempdir + '/output.recomb' run_whatshap(phase_input_files=[recombination_breaks_bamfile], variant_file='tests/data/quartet.vcf.gz', output=outvcf, ped='tests/data/recombination_breaks.ped', recombination_list_filename = outlist, **parameters) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 4 assert table.samples == ['HG002', 'HG005', 'HG003', 'HG004'] assert table.num_of_blocks_of('HG002') == 0 assert table.num_of_blocks_of('HG005') == 0 assert table.num_of_blocks_of('HG003') == 1 assert table.num_of_blocks_of('HG004') == 0 phase0 = VariantCallPhase(68735304, 0, None) phase1 = VariantCallPhase(68735304, 1, None) assert_phasing(table.phases_of('HG002'), [None, None, None, None]) assert_phasing(table.phases_of('HG005'), [None, None, None, None]) if expect_recombination: assert_phasing(table.phases_of('HG003'), [phase0, phase0, None, phase1]) else: assert_phasing(table.phases_of('HG003'), [phase0, phase0, None, phase0]) assert_phasing(table.phases_of('HG004'), [None, None, None, None]) lines = open(outlist).readlines() if expect_recombination: assert len(lines) == 3 assert lines[1]=='HG002 1 68735433 68738308 0 0 0 1 3\n' assert lines[2]=='HG005 1 68735433 68738308 0 0 0 1 3\n' else: assert len(lines) == 1
def test_phase_specific_chromosome(chromosome, tmp_path): outvcf = tmp_path / "output.vcf" run_whatshap( phase_input_files=[trio_bamfile], variant_file="tests/data/trio-two-chromosomes.vcf", output=outvcf, ped="tests/data/trio.ped", genmap="tests/data/trio.map", chromosomes=[chromosome], ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 2 for table in tables: assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002"] if table.chromosome == "1" == chromosome: phase0 = VariantCallPhase(60906167, (0, 1), None) assert_phasing( table.phases_of("HG004"), [phase0, phase0, phase0, phase0, phase0], ) assert_phasing(table.phases_of("HG003"), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of("HG002"), [None, phase0, None, None, None]) elif table.chromosome == "2" == chromosome: phase0 = VariantCallPhase(60906167, (0, 1), None) phase1 = VariantCallPhase(60906167, (1, 0), None) assert_phasing(table.phases_of("HG004"), [phase0, None, None, None, phase1]) assert_phasing(table.phases_of("HG003"), [phase0, None, None, None, None]) assert_phasing(table.phases_of("HG002"), [None, None, None, None, phase0]) else: assert_phasing(table.phases_of("HG004"), [None, None, None, None, None]) assert_phasing(table.phases_of("HG003"), [None, None, None, None, None]) assert_phasing(table.phases_of("HG002"), [None, None, None, None, None])
def test_genetic_haplotyping(tmp_path): outvcf = tmp_path / "output.vcf" outrecomb = tmp_path / "utput.recomb" run_whatshap( variant_file="tests/data/genetic-haplotyping.vcf", phase_input_files=[], ped="tests/data/genetic-haplotyping.ped", output=outvcf, recombination_list_filename=outrecomb, ) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 3 assert table.samples == [ "sampleA", "sampleB", "sampleC", "sampleD", "sampleE" ] assert table.num_of_blocks_of("sampleA") == 1 assert table.num_of_blocks_of("sampleB") == 1 assert table.num_of_blocks_of("sampleC") == 0 assert table.num_of_blocks_of("sampleD") == 1 assert table.num_of_blocks_of("sampleE") == 1 phase0 = VariantCallPhase(10327, (0, 1), None) phase1 = VariantCallPhase(10327, (1, 0), None) assert_phasing(table.phases_of("sampleA"), [phase0, phase0, phase1]) assert_phasing(table.phases_of("sampleB"), [phase0, None, None]) assert_phasing(table.phases_of("sampleC"), [None, None, None]) assert_phasing(table.phases_of("sampleD"), [phase0, None, phase1]) assert_phasing(table.phases_of("sampleE"), [phase0, phase0, None]) lines = [l.split() for l in open(outrecomb)] assert len(lines) == 2 Fields = namedtuple("Fields", [f.strip("#\n") for f in lines[0]]) recomb = Fields(*lines[1]) print(recomb) assert recomb.child_id == "sampleC" assert recomb.chromosome == "1" assert recomb.position1 == "31295" assert recomb.position2 == "102596"
def test_indel_phasing(algorithm): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' run_whatshap( phase_input_files=[indels_bamfile], indels=True, variant_file='tests/data/indels.vcf', reference='tests/data/random0.fasta', output=outvcf, algorithm=algorithm) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, indels=True, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == 'random0' assert len(table.variants) == 4 assert table.samples == ['sample1'] phase0 = VariantCallPhase(41, 0, None) phase1 = VariantCallPhase(41, 1, None) assert_phasing(table.phases_of('sample1'), [phase0, phase1, phase0, phase1])
def test_read_phased_vcf(): for filename in ["tests/data/phased-via-HP.vcf", "tests/data/phased-via-PS.vcf"]: print("Testing", filename) tables = list(VcfReader(filename, phases=True)) assert len(tables) == 2 table_a, table_b = tables assert table_a.chromosome == "chrA" assert len(table_a.variants) == 4 assert table_a.samples == ["sample1", "sample2"] assert table_b.chromosome == "chrB" assert len(table_b.variants) == 2 assert table_b.samples == ["sample1", "sample2"] assert len(table_a.genotypes) == 2 assert list(table_a.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([1, 2, 1, 1]) assert list(table_a.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 1, 1, 1]) assert list(table_a.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list( [1, 2, 1, 1] ) assert list(table_a.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list( [1, 1, 1, 1] ) assert len(table_b.genotypes) == 2 assert list(table_b.genotypes[0]) == canonic_index_list_to_biallelic_gt_list([0, 1]) assert list(table_b.genotypes[1]) == canonic_index_list_to_biallelic_gt_list([1, 2]) assert list(table_b.genotypes_of("sample1")) == canonic_index_list_to_biallelic_gt_list( [0, 1] ) assert list(table_b.genotypes_of("sample2")) == canonic_index_list_to_biallelic_gt_list( [1, 2] ) print(table_a.phases) assert len(table_a.phases) == 2 expected_phase_sample1 = [ None, None, VariantCallPhase(block_id=300, phase=(1, 0), quality=23), VariantCallPhase(block_id=300, phase=(0, 1), quality=42), ] expected_phase_sample2 = [ VariantCallPhase(block_id=100, phase=(0, 1), quality=10), VariantCallPhase(block_id=100, phase=(1, 0), quality=20), VariantCallPhase(block_id=300, phase=(0, 1), quality=30), VariantCallPhase(block_id=300, phase=(0, 1), quality=None), ] assert list(table_a.phases[0]) == expected_phase_sample1 assert list(table_a.phases[1]) == expected_phase_sample2 assert list(table_a.phases_of("sample1")) == expected_phase_sample1 assert list(table_a.phases_of("sample2")) == expected_phase_sample2 assert len(table_b.phases) == 2 assert list(table_b.phases[0]) == [None, None] assert list(table_b.phases[1]) == [None, None] assert list(table_b.phases_of("sample1")) == [None, None] assert list(table_b.phases_of("sample2")) == [None, None]
def test_phase_trio_merged_blocks(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output-merged-blocks.vcf' run_whatshap(phase_input_files=[trio_merged_bamfile], variant_file='tests/data/trio-merged-blocks.vcf', output=outvcf, ped='tests/data/trio.ped', genmap='tests/data/trio.map') assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 8 assert table.samples == ['HG002', 'HG003', 'HG004'] assert table.num_of_blocks_of('HG004') == 1 assert table.num_of_blocks_of('HG003') == 1 assert table.num_of_blocks_of('HG002') == 1 phase0 = VariantCallPhase(752566, 0, None) phase1 = VariantCallPhase(752566, 1, None) assert_phasing(table.phases_of('HG004'), [phase1, phase1, phase1, None, phase1, phase1, phase1, phase1]) assert_phasing(table.phases_of('HG003'), [None, None, None, None, phase0, phase0, phase0, phase1]) assert_phasing(table.phases_of('HG002'), [None, None, None, None, None, None, None, phase1])
def test_genetic_haplotyping(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' outrecomb = tempdir + '/output.recomb' run_whatshap(variant_file='tests/data/genetic-haplotyping.vcf', phase_input_files=[], ped='tests/data/genetic-haplotyping.ped', output=outvcf, recombination_list_filename=outrecomb) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 3 assert table.samples == ['sampleA', 'sampleB', 'sampleC', 'sampleD', 'sampleE'] assert table.num_of_blocks_of('sampleA') == 1 assert table.num_of_blocks_of('sampleB') == 1 assert table.num_of_blocks_of('sampleC') == 0 assert table.num_of_blocks_of('sampleD') == 1 assert table.num_of_blocks_of('sampleE') == 1 phase0 = VariantCallPhase(10327, 0, None) phase1 = VariantCallPhase(10327, 1, None) assert_phasing(table.phases_of('sampleA'), [phase0, phase0, phase1]) assert_phasing(table.phases_of('sampleB'), [phase0, None, None]) assert_phasing(table.phases_of('sampleC'), [None, None, None]) assert_phasing(table.phases_of('sampleD'), [phase0, None, phase1]) assert_phasing(table.phases_of('sampleE'), [phase0, phase0, None]) lines = [l.split() for l in open(outrecomb)] assert len(lines) == 2 Fields = namedtuple('Fields', [f.strip('#\n') for f in lines[0]]) recomb = Fields(*lines[1]) print(recomb) assert recomb.child_id == 'sampleC' assert recomb.chromosome == '1' assert recomb.position1 == '31295' assert recomb.position2 == '102596'
def test_indel_phasing(algorithm, tmp_path): outvcf = tmp_path / "output.vcf" run_whatshap( phase_input_files=[indels_bamfile], indels=True, variant_file="tests/data/indels.vcf", reference="tests/data/random0.fasta", output=outvcf, algorithm=algorithm, ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, indels=True, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "random0" assert len(table.variants) == 4 assert table.samples == ["sample1"] phase0 = VariantCallPhase(41, (0, 1), None) phase1 = VariantCallPhase(41, (1, 0), None) assert_phasing(table.phases_of("sample1"), [phase0, phase1, phase0, phase1])
def test_phase_trio_paired_end_reads(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output-paired_end.vcf' run_whatshap(phase_input_files=[trio_paired_end_bamfile], variant_file='tests/data/paired_end.sorted.vcf', output=outvcf, ped='tests/data/trio_paired_end.ped', genmap='tests/data/trio.map') assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 3 assert table.samples == ['mother', 'father', 'child'] assert table.num_of_blocks_of('mother') == 1 assert table.num_of_blocks_of('father') == 0 assert table.num_of_blocks_of('child') == 1 phase0 = VariantCallPhase(80050, 0, None) phase1 = VariantCallPhase(80050, 1, None) assert_phasing(table.phases_of('mother'), [phase1, phase1, phase0]) assert_phasing(table.phases_of('father'), [None, None, None]) assert_phasing(table.phases_of('child'), [None, None, phase1])
def test_phase_trio_use_ped_samples(ped_samples, tmpdir): outvcf = str(tmpdir.join("output_ped_samples.vcf")) outreadlist = str(tmpdir.join("readlist.tsv")) run_whatshap( phase_input_files=[ped_samples_bamfile], variant_file="tests/data/ped_samples.vcf", read_list_filename=outreadlist, output=outvcf, ped="tests/data/trio.ped", genmap="tests/data/trio.map", use_ped_samples=ped_samples, ) assert os.path.isfile(outvcf) assert os.path.isfile(outreadlist) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002", "orphan"] phase0 = VariantCallPhase(60906167, (0, 1), None) phase1 = VariantCallPhase(60907394, (0, 1), None) assert_phasing(table.phases_of("HG004"), [phase0, phase0, phase0, phase0, phase0]) assert_phasing(table.phases_of("HG003"), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of("HG002"), [None, phase0, None, None, None]) if ped_samples: assert_phasing(table.phases_of("orphan"), [None, None, None, None, None]) else: assert_phasing(table.phases_of("orphan"), [None, phase1, phase1, phase1, None])
def test_distrust_genotypes_assertion(tmp_path): outvcf = tmp_path / "output.vcf" run_whatshap( indels=False, phase_input_files=[dist_geno_bamfile], variant_file="tests/data/test_dist_geno.vcf", output=outvcf, ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "chr1" phase0 = VariantCallPhase(23824647, (0, 1), None) assert_phasing(table.phases_of("NA12878"), [None, phase0, None, phase0])
def test_phase_one_of_three_individuals(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio.vcf', output=outvcf, samples=['HG003']) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002'] phase0 = VariantCallPhase(60906167,0,None) assert_phasing(table.phases_of('HG004'), [None, None, None, None, None]) assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, None, None]) assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
def test_read_phased_vcf(): for filename in [ 'tests/data/phased-via-HP.vcf', 'tests/data/phased-via-PS.vcf' ]: print('Testing', filename) tables = list(VcfReader(filename, phases=True)) assert len(tables) == 2 table_a, table_b = tables assert table_a.chromosome == 'chrA' assert len(table_a.variants) == 4 assert table_a.samples == ['sample1', 'sample2'] assert table_b.chromosome == 'chrB' assert len(table_b.variants) == 2 assert table_b.samples == ['sample1', 'sample2'] assert len(table_a.genotypes) == 2 assert list(table_a.genotypes[0]) == [1, 2, 1, 1] assert list(table_a.genotypes[1]) == [1, 1, 1, 1] assert list(table_a.genotypes_of('sample1')) == [1, 2, 1, 1] assert list(table_a.genotypes_of('sample2')) == [1, 1, 1, 1] assert len(table_b.genotypes) == 2 assert list(table_b.genotypes[0]) == [0, 1] assert list(table_b.genotypes[1]) == [1, 2] assert list(table_b.genotypes_of('sample1')) == [0, 1] assert list(table_b.genotypes_of('sample2')) == [1, 2] print(table_a.phases) assert len(table_a.phases) == 2 expected_phase_sample1 = [ None, None, VariantCallPhase(block_id=300, phase=1, quality=23), VariantCallPhase(block_id=300, phase=0, quality=42) ] expected_phase_sample2 = [ VariantCallPhase(block_id=100, phase=0, quality=10), VariantCallPhase(block_id=100, phase=1, quality=20), VariantCallPhase(block_id=300, phase=0, quality=30), VariantCallPhase(block_id=300, phase=0, quality=None) ] assert list(table_a.phases[0]) == expected_phase_sample1 assert list(table_a.phases[1]) == expected_phase_sample2 assert list(table_a.phases_of('sample1')) == expected_phase_sample1 assert list(table_a.phases_of('sample2')) == expected_phase_sample2 assert len(table_b.phases) == 2 assert list(table_b.phases[0]) == [None, None] assert list(table_b.phases[1]) == [None, None] assert list(table_b.phases_of('sample1')) == [None, None] assert list(table_b.phases_of('sample2')) == [None, None]
def test_genetic_phasing_symbolic_alt(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' run_whatshap(phase_input_files=[], variant_file='tests/data/trio-symbolic-alt.vcf', output=outvcf, ped='tests/data/trio.ped', indels=True) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True, indels=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002'] phase0 = VariantCallPhase(60906167, 0, None) assert_phasing(table.phases_of('HG004'), [phase0, phase0, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG002'), [None, phase0, None, None, None])
def test_phase_mendelian_conflict(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output.vcf' run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio-mendelian-conflict.vcf', output=outvcf, ped='tests/data/trio.ped', genmap='tests/data/trio.map') assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002'] phase = VariantCallPhase(60906167, 0, None) assert_phasing(table.phases_of('HG004'), [phase, None, phase, phase, phase]) assert_phasing(table.phases_of('HG003'), [phase, None, phase, phase, phase]) assert_phasing(table.phases_of('HG002'), [None, None, None, None, None])
def test_phase_trio_distrust_genotypes(): with TemporaryDirectory() as tempdir: outvcf = tempdir + '/output_gl.vcf' outreadlist = tempdir + '/readlist.tsv' run_whatshap(phase_input_files=[trio_bamfile], variant_file='tests/data/trio_genotype_likelihoods.vcf', read_list_filename=outreadlist, output=outvcf, ped='tests/data/trio.ped', genmap='tests/data/trio.map', distrust_genotypes=True) assert os.path.isfile(outvcf) assert os.path.isfile(outreadlist) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == '1' assert len(table.variants) == 5 assert table.samples == ['HG004', 'HG003', 'HG002'] phase0 = VariantCallPhase(60906167, 0, None) assert_phasing(table.phases_of('HG004'), [None, phase0, phase0, phase0, None]) assert_phasing(table.phases_of('HG003'), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of('HG002'), [phase0, None, phase0, phase0, phase0])
def test_phase_missing_genotypes(tmp_path): outvcf = tmp_path / "output.vcf" run_whatshap( phase_input_files=[trio_bamfile], variant_file="tests/data/trio-missing-genotypes.vcf", output=outvcf, ped="tests/data/trio.ped", genmap="tests/data/trio.map", ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002"] phase = VariantCallPhase(60906167, (0, 1), None) assert_phasing(table.phases_of("HG004"), [phase, phase, None, phase, None]) assert_phasing(table.phases_of("HG003"), [phase, None, None, phase, None]) assert_phasing(table.phases_of("HG002"), [None, phase, None, None, None])
def test_phase_mendelian_conflict(tmpdir): outvcf = str(tmpdir.join("output.vcf")) run_whatshap( phase_input_files=[trio_bamfile], variant_file="tests/data/trio-mendelian-conflict.vcf", output=outvcf, ped="tests/data/trio.ped", genmap="tests/data/trio.map", ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002"] phase = VariantCallPhase(60906167, (0, 1), None) assert_phasing(table.phases_of("HG004"), [phase, None, phase, phase, phase]) assert_phasing(table.phases_of("HG003"), [phase, None, phase, phase, phase]) assert_phasing(table.phases_of("HG002"), [None, None, None, None, None])
def test_genetic_phasing_symbolic_alt(tmpdir): outvcf = str(tmpdir.join("output.vcf")) run_whatshap( phase_input_files=[], variant_file="tests/data/trio-symbolic-alt.vcf", output=outvcf, ped="tests/data/trio.ped", indels=True, ) assert os.path.isfile(outvcf) tables = list(VcfReader(outvcf, phases=True, indels=True)) assert len(tables) == 1 table = tables[0] assert table.chromosome == "1" assert len(table.variants) == 5 assert table.samples == ["HG004", "HG003", "HG002"] phase0 = VariantCallPhase(60906167, (0, 1), None) assert_phasing(table.phases_of("HG004"), [phase0, phase0, phase0, phase0, phase0]) assert_phasing(table.phases_of("HG003"), [phase0, None, phase0, phase0, phase0]) assert_phasing(table.phases_of("HG002"), [None, phase0, None, None, None])