def test_phasing_for_two_heterozygous_variants_ocrn_same_strand(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self) \ .with_ref_sequence( "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom) \ .with_read( "........T...T........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".....................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_allow_MNP_calls(False) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) records_expect = vcf_expect.has_record_for_variants( Variant(chrom, 8, "G", "T"), Variant(chrom, 12, "A", "T") ) records_expect\ .with_sample(sample_name)\ .has_phased_genotypes("0|1", "0|1")\ .has_phase_set_id("8")
def test_doesnt_give_a_flying_damn_about_spurious_filter_header(self): chrom = "22" variant = Variant(chrom, 11, "A", "C") schema = Schema() complex_filter_name = '.+-*\\/~@?!%^&><=\"\'(){}[]_|' schema.set_filter(complex_filter_name, 'unusual characters') gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"), schema=schema) gv_builder.with_record_from_variant(variant, filters={complex_filter_name}) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call(expected_success=True) expect .with_output_vcf()\ .has_record_for_variant(variant)\ .with_sample(dodgy_sample)\ .has_genotype("1/1")
def test_should_handle_complex_variant_input(self): chrom = "22" variant = Variant(chrom, 10, "CAA", "CA") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call() expect.with_log()\ .input_variant_trimmed_warning(variant, Variant(chrom, 11, "A", "")) expect.with_output_vcf()\ .record_count(1)
def test_should_not_apply_filter_to_snp_if_all_supporting_reads_are_good( self): svc = SVCDriver(self) \ .with_var_filters("BR") \ .with_bad_reads_window_size(7) \ .with_min_bad_reads_score(15) svc.with_ref_sequence( # 1234567 890123456789 "AAAGCGTAA**CCGGGTTAGT**CAAACCCGTTACGTATGCATG").with_read( ".........**.....G....**.....................", n_rev=10, n_fwd=10).with_read( ".........GT..........TA.....................", " 00 00 ", n_rev=11, n_fwd=10) expect = svc.call() vcf_expectation = expect.with_output_vcf() vcf_expectation.record_count(3) vcf_expectation \ .has_record_for_variant(Variant(DEFAULT_CHROM, 14, "T", "G")) \ .with_no_filters()
def test_calls_deletion_and_snp_at_same_location_in_repeat_region_with_few_reads_as_anchors( self): chrom = "1" sample = "sample" svc_driver = SVCDriver(self) svc_driver.with_ref_sequence( 'CGAGAGAGAGAGAGAGAGAGATAGAGAGAGAGAGAGAGAGTC', chrom=chrom).with_read( '....................**....................', n_rev=5, n_fwd=0, chrom=chrom, sample_name=sample).with_read( '.....................G....................', n_rev=5, n_fwd=0, chrom=chrom, sample_name=sample) expect = svc_driver.call() vcf_expect = expect.with_output_vcf() vcf_expect \ .has_record_for_variants( Variant(chrom, 21, "T", "G"), Variant(chrom, 19, "GAT", "G") ).with_sample(sample).has_phased_genotypes(".|1", "1|.")
def test_phase_alignment_for_het_variants_for_three_clusters_when_first_cluster_is_homozygous( self): sample_name = "sample1" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \ .with_read( "....A....................................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( "....A.............C...............T......", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_allow_MNP_calls(False) \ .with_max_cluster_distance(5) \ .with_min_cluster_distance(5) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(3) vcf_expect.has_record_for_variants(Variant(chrom, 4, "C", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|1")\ .has_phase_set_id("4") vcf_expect.has_record_for_variants(Variant(chrom, 18, "T", "C"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("13") vcf_expect.has_record_for_variants(Variant(chrom, 34, "A", "T"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("13")
def test_phase_not_aligns_for_hom_snp_in_first_cluster(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \ .with_read( ".......A.....................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_allow_MNP_calls(False) \ .with_max_cluster_distance(10) vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|1")\ .has_phase_set_id("7") vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("28")
def test_gets_correct_genotype_if_not_fully_left_aligned(self): chrom = "22" variant = Variant(chrom, 12, "AA", "A") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAATACGCCCCCTACGCCCCCT", chrom=chrom, pos_from=0).with_read( "...................*...................", n_fwd=10, n_rev=10, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call() expect.with_output_vcf()\ .record_count(1)\ .has_record_for_variant(variant)\ .with_sample(dodgy_sample).has_genotype("1/1")
def test_genotypes_variant_correctly_complex_indel_which_is_snp_and_deletion( self): chrom = "22" variant = Variant(chrom, 10, "CA", "T") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant) gv_builder.build().index() driver = SVCDriver(self) sample = "bobs_your_uncle" driver.with_ref_sequence("ACGCCCCCTGCAAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "..........T*..........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call() expect.with_output_vcf()\ .has_record_for_variant(variant)\ .with_sample(sample)\ .has_genotype("1/1")
def test_calls_correct_reference_between_clusters_with_uncalled_indel_between( self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..*....................... ", n_fwd=10, n_rev=10).with_read( ".......................*............ ", n_fwd=1, n_rev=1).with_read( ".......................T.. ", n_fwd=10, n_rev=10).with_output_ref_calls( True).with_max_cluster_distance(5) expect = driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8)) vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23)) vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
def test_dont_call_reference_between_variant_and_insertion_due_to_vcf_rep_issues( self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAACGCACG*CCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..........T................ ", chrom=chrom).with_read( " ...............T........... ", chrom=chrom).with_read( " ...........T.*................ ", chrom=chrom).with_read( " ...........T.*..................... ", chrom=chrom).with_read( "...............T.*......... ", chrom=chrom).with_output_ref_calls(True) vcf_expect = driver.call().with_output_vcf() # Has only 4 records which are:- vcf_expect.has_reference_calls_for_region(chrom, 0, 15) vcf_expect.has_record(chrom, 15, "C", "T") vcf_expect.has_record(chrom, 16, "G", "GT") vcf_expect.has_reference_calls_for_region(chrom, 17, 41)
def test_calls_reference_on_location_with_low_quality_variant_support( self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..*....................... ", n_fwd=2, n_rev=1).with_read( ".................T.....T......... ", " 1 ", n_fwd=1, n_rev=1).with_read( ".......................T.. ", n_fwd=1, n_rev=0).with_output_ref_calls(True) expect = driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8)) vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23)) vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T")) vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
def test_calls_correct_ref_calls_with_cluster_of_variants(self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAATAACGCACG*CCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..*.......*................ ", chrom=chrom).with_read( " .......*.......*........... ", chrom=chrom).with_read( " .............T......T.......... ", chrom=chrom).with_read( " .............T......T.............. ", chrom=chrom).with_read( ".................T......T.. ", chrom=chrom).with_output_ref_calls(True) vcf_expect = driver.call().with_output_vcf() vcf_expect.has_reference_calls_for_region(chrom, 0, 8) vcf_expect.has_record(chrom, 8, "TA", "T") vcf_expect.has_reference_calls_for_region(chrom, 10, 16) vcf_expect.has_record(chrom, 16, "G", "GT") vcf_expect.has_reference_calls_for_region(chrom, 17, 23) vcf_expect.has_record(chrom, 23, "A", "T") vcf_expect.has_reference_calls_for_region(chrom, 24, 41)
def test_calls_ref_calls_correctly_with_mnp_that_contains_snp(self): chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAACGCACCCCCCATAAAAAAATTTTTTTTTTT", chrom=chrom).with_read( " ..........TTTT............. ", chrom=chrom).with_read( " ...............TTTT........ ", chrom=chrom).with_read( " ..............T................ ", chrom=chrom).with_read( " ..............T.................... ", chrom=chrom).with_read( "..................T........ ", chrom=chrom).with_output_ref_calls( True).with_allow_MNP_calls(True) vcf_expect = driver.call().with_output_vcf() vcf_expect.has_reference_calls_for_region(chrom, 0, 17) vcf_expect.has_record(chrom, 17, "CCCC", "TTTT") vcf_expect.has_record(chrom, 18, "C", "T") vcf_expect.has_reference_calls_for_region(chrom, 21, 41)
def test_depth_computation_all_reads_spanning_reference_with_insertion( self): sample_name = "bah" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAAAAAAC*AAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read( "................T.......................", n_rev=5, n_fwd=5, sample_name=sample_name).with_output_ref_calls( True).with_allow_MNP_calls(False) expect = driver.call() vcf_expect = expect.with_output_vcf() vcf_expect \ .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10) vcf_expect \ .has_record_for_variant(Variant(chrom, 15, "C", "CT"))\ .with_sample(sample_name).has_read_depth(10) vcf_expect \ .has_record_for_variant(Variant(chrom, 16, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)
def test_genotypes_mnp_correctly_with_supporting_reads(self): chrom = "22" variant = Variant(chrom, 11, "AAA", "CAC") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.C.......", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call() expect.with_output_vcf()\ .has_record_for_variant(variant)\ .with_sample(dodgy_sample)\ .has_genotype("./1")
def test_phasing_for_isolated_snp_on_one_sample_only(self): sample_1 = "sample_1" sample_2 = "sample_2" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom ).with_read( "........T............", n_fwd=10, n_rev=10, sample_name=sample_1 ).with_read( ".....................", n_fwd=10, n_rev=10, sample_name=sample_2 ) svc_driver.with_output_phased_genotypes(True) expect = svc_driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.record_count(1) record_expect = vcf_expect.has_record_for_variant( Variant(chrom, 8, "G", "T")) sample_1_expect = record_expect.with_sample(sample_1) sample_1_expect.has_phased_genotype("1|1") sample_1_expect.has_phase_set_id(str(8)) sample_1_expect.has_phase_set_quality(MAX_PHRED) sample_2_expect = record_expect.with_sample(sample_2) sample_2_expect.has_phased_genotype("0|0") sample_2_expect.has_phase_set_id(str(8)) sample_2_expect.has_phase_set_quality(MAX_PHRED)
def test_should_report_unkown_value_for_allele_frequence_when_depth_is_zero( self): # Only way to output depth zero for sample and variant is to have # another good sample good_sample = "good_sample" empty_sample = "empty_sample" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence("AAACGTAGCTGTGCACCCCCAAA", chrom=chrom).with_read( "..........T............", n_fwd=10, n_rev=10, sample_name=good_sample, ).with_read( ".......................", n_fwd=0, n_rev=0, sample_name=empty_sample, ) vcf = driver.call().with_output_vcf() vcf \ .with_samples([good_sample, empty_sample]) \ .record_count(1) vcf \ .has_record_for_variant(Variant(chrom, 10, "G", "T")) \ .with_sample(empty_sample) \ .has_read_depth(0) \ .has_variant_allelic_frequency(None)
def test_phase_alignment_for_two_snps_in_different_clusters_on_different_strands( self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \ .with_read( ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".............................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_output_phased_genotypes(True) \ .with_max_cluster_distance(10) \ vcf_expect = svc_driver.call()\ .with_output_vcf()\ .record_count(2) vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("1|0")\ .has_phase_set_id("7") vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\ .with_sample(sample_name)\ .has_exact_phased_genotypes("0|1")\ .has_phase_set_id("7")
def test_should_record_the_read_support_insertion_and_snp_on_same_strand( self): sample = "sample" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence("AAACGTAGCTG*GCACCCCCAAA", chrom=chrom).with_read( "...........*...........", n_fwd=10, n_rev=10, sample_name=sample, ).with_read( "..........CT...........", n_fwd=10, n_rev=10, sample_name=sample, ) vcf = driver.call().with_output_vcf() vcf \ .with_samples([sample]) \ .record_count(2) vcf \ .has_record_for_variant(Variant(chrom, 9, 'T', 'TC')) \ .with_sample(sample) \ .has_read_depth(40) \ .has_allelic_read_support(20, 20) \ .has_genotype("0/1")
def test_calls_correct_reference_when_one_sample_has_snp_and_tother_has_indel(self): chrom = "1" sample_1 = "sample_1" sample_2 = "sample_2" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAACGCACG*CCCCATAAAAAAATTTTTTTTTTT", chrom=chrom ).with_read( " ..........T................ ", chrom=chrom, sample_name=sample_1 ).with_read( " ...............T........... ", chrom=chrom, sample_name=sample_1 ).with_read( " ...........T.*................ ", chrom=chrom, sample_name=sample_2 ).with_read( " ...........T.*..................... ", chrom=chrom, sample_name=sample_2 ).with_read( "...............T.*......... ", chrom=chrom, sample_name=sample_2 ).with_output_ref_calls(True) vcf_expect = driver.call().with_output_vcf() # Has only 4 records which are:- vcf_expect.has_reference_calls_for_region(chrom, 0, 15) vcf_expect.has_record(chrom, 15, "C", "T") vcf_expect.has_record(chrom, 16, "G", "GT") vcf_expect.has_reference_calls_for_region(chrom, 17, 41)
def test_should_count_reads_that_do_not_overlap_the_calling_region(self): sample = "seed" chrom = "1" driver = SVCDriver(self).with_allow_MNP_calls(True) driver.with_ref_sequence( "GAAAAAAAAAAACGCACCCCCAAATTTTTTTTAA***********AAAATAAAAAACGCACCCCCAAATTTTTTTTAA", chrom=chrom ).with_read( " ..........G......................", n_fwd=10, n_rev=10, sample_name=sample, ).with_read( "..................................AAAATAAAAAG ", n_fwd=10, n_rev=10, sample_name=sample, ).with_region_string("{}:{}-{}".format(chrom, 34, 55)) vcf = driver.call().with_output_vcf() vcf \ .has_record_for_variant(Variant(chrom, 44, 'A', 'G')) \ .with_info() \ .with_field("DP", [40]) \ .with_field("VC", [40])
def test_bad_reads_filter_not_applied_when_median_read_is_good(self): svc = SVCDriver(self) \ .with_var_filters("BR") \ .with_bad_reads_window_size(7) \ .with_min_bad_reads_score(20) svc.with_ref_sequence( # 1234567890123456789 "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG").with_read( "................G...........................", " 1 1 ", n_rev=10, n_fwd=10).with_read( "................G...........................", " 4444444 4444444 ", n_rev=11, n_fwd=10) expect = svc.call() vcf_expectation = expect.with_output_vcf() vcf_expectation.record_count(1) vcf_expectation \ .has_record_for_variant(Variant(DEFAULT_CHROM, 16, "T", "G")) \ .with_no_filters()
def test_should_stop_mildly_allele_and_strand_biased_calls(self): chrom = 'chr1' svc = SVCDriver(self) reads = 10 allele_bias = 5 strand_bias = 4 svc.with_ref_sequence( "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG", chrom=chrom).with_read( "............................................", n_rev=reads + allele_bias + strand_bias, n_fwd=reads + allele_bias - strand_bias, chrom=chrom).with_read( "................G...........................", n_rev=reads - allele_bias - strand_bias, n_fwd=reads - allele_bias + strand_bias, chrom=chrom) expect = svc.call() expect.with_output_vcf() \ .record_count(1) \ .has_record_for_variant(Variant(chrom, 16, 'T', 'G')) \ .with_filters({'AB+SB'})
def test_should_call_basic_snps(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self).with_ref_sequence( "GCCCCAGCCTCCCAAAGTGCATTGATTTTGTTGTTGTTGTGCTTATTTGCACTCCAGCCTGGCCTCTCCTTTCTTG", chrom=chrom ).with_read( "...............T.........A...............G..................................", n_fwd=10, n_rev=10, sample_name=sample_name ).with_read( ".........................A..........................A.......................", n_fwd=10, n_rev=10, sample_name=sample_name).with_normalize_variant_calls(True) expect = svc_driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.record_count(4) vcf_expect.has_record(chrom, 15, "A", "T").with_sample(sample_name).has_genotype("1|0") vcf_expect.has_record(chrom, 25, "T", "A").with_sample(sample_name).has_genotype("1|1") vcf_expect.has_record(chrom, 41, "C", "G").with_sample(sample_name).has_genotype("1|0") vcf_expect.has_record(chrom, 52, "T", "A").with_sample(sample_name).has_genotype("0|1")
def test_min_depth_computation_with_mixed_depth_of_reads(self): sample_name = "bah" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read( ".............................. ", n_rev=5, n_fwd=5, sample_name=sample_name).with_read( " ..............................", n_rev=3, n_fwd=3, sample_name=sample_name).with_output_ref_calls(True) expect = driver.call() expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10) expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 10, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(16).has_min_read_depth(16) expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 30, "A", ref_alt))\ .with_sample(sample_name).has_read_depth(6).has_min_read_depth(6)
def test_doesnt_give_a_flying_damn_about_spurious_filters(self): chrom = "22" variant = Variant(chrom, 11, "A", "C") gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf")) gv_builder.with_record_from_variant(variant, filters={"#$.:@$%$%^&**()7!"}) gv_builder.build().index() driver = SVCDriver(self) dodgy_sample = "bobs_your_uncle" driver.with_ref_sequence( "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read( "...........C.........", n_fwd=5, n_rev=5, chrom=chrom, sample_name=dodgy_sample).with_genotype_alleles( gv_builder.compressed_filename) expect = driver.call(expected_success=True) expect.with_output_vcf()\ .has_record_for_variant(variant)\ .with_sample(dodgy_sample)\ .has_genotype("1/1")
def test_min_depth_computation_with_mixed_depth_of_reads_when_no_chunking_occurs( self): sample_name = "bah" chrom = "1" driver = SVCDriver(self) driver.with_ref_sequence( "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read( "........................................", n_rev=10, n_fwd=10, sample_name=sample_name).with_read( "................................... ", n_rev=2, n_fwd=2, sample_name=sample_name).with_read( " ................................... ", n_rev=1, n_fwd=1, sample_name=sample_name).with_output_ref_calls(True) expect = driver.call() expect\ .with_output_vcf()\ .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\ .with_sample(sample_name)\ .has_read_depth(round(20 + 4 * 35 / 40 + 2 * 35 / 40))\ .has_min_read_depth(20)
def test_should_call_variants(self): chrom = 'chr1' sample_name = 'sample' svc = SVCDriver(self) \ .with_ploidy(3) svc.with_ref_sequence( "AAAGCGTACAACCGGGTTAGTC***AACCCGTTACGTATGCATG", chrom=chrom ).with_read( "......C.........G.....ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name ).with_read( "......C...............ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name ).with_read( "......................ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name) expect = svc.call() vcf = expect \ .with_output_vcf() \ .record_count(4) vcf.has_record_for_variant(Variant(chrom, 6, 'T', 'C')).with_sample(sample_name).has_genotype('0/1/1') vcf.has_record_for_variant(Variant(chrom, 16, 'T', 'G')).with_sample(sample_name).has_genotype('0/0/1') vcf.has_record_for_variant(Variant(chrom, 21, 'C', 'CATG')).with_sample(sample_name).has_genotype('1/1/1') vcf.has_record_for_variant(Variant(chrom, 28, 'TTAC', 'T')).with_sample(sample_name).has_genotype('1/1/1')
def test_phasing_for_isolated_heterozygous_variant(self): sample_name = "a_sample" chrom = "1" svc_driver = SVCDriver(self)\ .with_ref_sequence( "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom) \ .with_read( "........T............", n_fwd=10, n_rev=10, sample_name=sample_name) \ .with_read( ".....................", n_fwd=10, n_rev=10, sample_name=sample_name) svc_driver.with_output_phased_genotypes(True) expect = svc_driver.call() vcf_expect = expect.with_output_vcf() vcf_expect.record_count(1) record_expect = vcf_expect.has_record_for_variants( Variant(chrom, 8, "G", "T")) sample_expect = record_expect.with_sample(sample_name) sample_expect.has_phased_genotypes("0|1") sample_expect.has_phase_set_id(str(8)) sample_expect.has_phase_set_quality(MAX_PHRED)