def test_phasing_for_two_heterozygous_variants_ocrn_same_strand(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self) \
            .with_ref_sequence(
                "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom) \
            .with_read(
                "........T...T........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".....................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_allow_MNP_calls(False)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        records_expect = vcf_expect.has_record_for_variants(
            Variant(chrom, 8, "G", "T"),
            Variant(chrom, 12, "A", "T")
        )
        records_expect\
            .with_sample(sample_name)\
            .has_phased_genotypes("0|1", "0|1")\
            .has_phase_set_id("8")
Exemple #2
0
    def test_doesnt_give_a_flying_damn_about_spurious_filter_header(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        schema = Schema()
        complex_filter_name = '.+-*\\/~@?!%^&><=\"\'(){}[]_|'
        schema.set_filter(complex_filter_name, 'unusual characters')

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"),
                                schema=schema)
        gv_builder.with_record_from_variant(variant,
                                            filters={complex_filter_name})
        gv_builder.build().index()
        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call(expected_success=True)
        expect .with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("1/1")
Exemple #3
0
    def test_should_handle_complex_variant_input(self):
        chrom = "22"

        variant = Variant(chrom, 10, "CAA", "CA")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_log()\
            .input_variant_trimmed_warning(variant, Variant(chrom, 11, "A", ""))
        expect.with_output_vcf()\
            .record_count(1)
Exemple #4
0
    def test_should_not_apply_filter_to_snp_if_all_supporting_reads_are_good(
            self):
        svc = SVCDriver(self) \
            .with_var_filters("BR") \
            .with_bad_reads_window_size(7) \
            .with_min_bad_reads_score(15)

        svc.with_ref_sequence(
            # 1234567  890123456789
            "AAAGCGTAA**CCGGGTTAGT**CAAACCCGTTACGTATGCATG").with_read(
                ".........**.....G....**.....................",
                n_rev=10,
                n_fwd=10).with_read(
                    ".........GT..........TA.....................",
                    "       00               00                  ",
                    n_rev=11,
                    n_fwd=10)

        expect = svc.call()
        vcf_expectation = expect.with_output_vcf()
        vcf_expectation.record_count(3)

        vcf_expectation \
            .has_record_for_variant(Variant(DEFAULT_CHROM, 14, "T", "G")) \
            .with_no_filters()
Exemple #5
0
    def test_calls_deletion_and_snp_at_same_location_in_repeat_region_with_few_reads_as_anchors(
            self):
        chrom = "1"
        sample = "sample"

        svc_driver = SVCDriver(self)
        svc_driver.with_ref_sequence(
            'CGAGAGAGAGAGAGAGAGAGATAGAGAGAGAGAGAGAGAGTC',
            chrom=chrom).with_read(
                '....................**....................',
                n_rev=5,
                n_fwd=0,
                chrom=chrom,
                sample_name=sample).with_read(
                    '.....................G....................',
                    n_rev=5,
                    n_fwd=0,
                    chrom=chrom,
                    sample_name=sample)
        expect = svc_driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect \
            .has_record_for_variants(
                Variant(chrom, 21, "T", "G"),
                Variant(chrom, 19, "GAT", "G")
            ).with_sample(sample).has_phased_genotypes(".|1", "1|.")
    def test_phase_alignment_for_het_variants_for_three_clusters_when_first_cluster_is_homozygous(
            self):
        sample_name = "sample1"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \
            .with_read(
                "....A....................................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                "....A.............C...............T......", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_allow_MNP_calls(False) \
            .with_max_cluster_distance(5) \
            .with_min_cluster_distance(5)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(3)

        vcf_expect.has_record_for_variants(Variant(chrom, 4, "C", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|1")\
            .has_phase_set_id("4")

        vcf_expect.has_record_for_variants(Variant(chrom, 18, "T", "C"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("13")

        vcf_expect.has_record_for_variants(Variant(chrom, 34, "A", "T"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("13")
    def test_phase_not_aligns_for_hom_snp_in_first_cluster(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \
            .with_read(
                ".......A.....................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_allow_MNP_calls(False) \
            .with_max_cluster_distance(10)

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|1")\
            .has_phase_set_id("7")

        vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("28")
Exemple #8
0
    def test_gets_correct_genotype_if_not_fully_left_aligned(self):
        chrom = "22"

        variant = Variant(chrom, 12, "AA", "A")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAATACGCCCCCTACGCCCCCT", chrom=chrom,
            pos_from=0).with_read(
                "...................*...................",
                n_fwd=10,
                n_rev=10,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_output_vcf()\
            .record_count(1)\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample).has_genotype("1/1")
Exemple #9
0
    def test_genotypes_variant_correctly_complex_indel_which_is_snp_and_deletion(
            self):
        chrom = "22"
        variant = Variant(chrom, 10, "CA", "T")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        sample = "bobs_your_uncle"
        driver.with_ref_sequence("ACGCCCCCTGCAAAAAAAAAAA",
                                 chrom=chrom,
                                 pos_from=0).with_read(
                                     "..........T*..........",
                                     n_fwd=5,
                                     n_rev=5,
                                     chrom=chrom,
                                     sample_name=sample).with_genotype_alleles(
                                         gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(sample)\
            .has_genotype("1/1")
Exemple #10
0
    def test_calls_correct_reference_between_clusters_with_uncalled_indel_between(
            self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..*.......................        ",
                n_fwd=10,
                n_rev=10).with_read(
                    ".......................*............     ",
                    n_fwd=1,
                    n_rev=1).with_read(
                        ".......................T..               ",
                        n_fwd=10,
                        n_rev=10).with_output_ref_calls(
                            True).with_max_cluster_distance(5)

        expect = driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8))

        vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23))

        vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
Exemple #11
0
    def test_dont_call_reference_between_variant_and_insertion_due_to_vcf_rep_issues(
            self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAACGCACG*CCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..........T................        ",
                chrom=chrom).with_read(
                    "  ...............T...........             ",
                    chrom=chrom).with_read(
                        "    ...........T.*................        ",
                        chrom=chrom).with_read(
                            "    ...........T.*.....................   ",
                            chrom=chrom).with_read(
                                "...............T.*.........               ",
                                chrom=chrom).with_output_ref_calls(True)

        vcf_expect = driver.call().with_output_vcf()

        # Has only 4 records which are:-
        vcf_expect.has_reference_calls_for_region(chrom, 0, 15)
        vcf_expect.has_record(chrom, 15, "C", "T")
        vcf_expect.has_record(chrom, 16, "G", "GT")
        vcf_expect.has_reference_calls_for_region(chrom, 17, 41)
Exemple #12
0
    def test_calls_reference_on_location_with_low_quality_variant_support(
            self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAATAACGCACGCCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..*.......................        ", n_fwd=2,
                n_rev=1).with_read(
                    ".................T.....T.........        ",
                    "                 1                       ",
                    n_fwd=1,
                    n_rev=1).with_read(
                        ".......................T..               ",
                        n_fwd=1,
                        n_rev=0).with_output_ref_calls(True)

        expect = driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.has_reference_calls(ChromInterval(chrom, 0, 8))

        vcf_expect.has_record_for_variant(Variant(chrom, 8, "TA", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 10, 23))

        vcf_expect.has_record_for_variant(Variant(chrom, 23, "A", "T"))

        vcf_expect.has_reference_calls(ChromInterval(chrom, 24, 41))
Exemple #13
0
    def test_calls_correct_ref_calls_with_cluster_of_variants(self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAATAACGCACG*CCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..*.......*................        ",
                chrom=chrom).with_read(
                    "  .......*.......*...........             ",
                    chrom=chrom).with_read(
                        "    .............T......T..........       ",
                        chrom=chrom).with_read(
                            "    .............T......T..............   ",
                            chrom=chrom).with_read(
                                ".................T......T..               ",
                                chrom=chrom).with_output_ref_calls(True)

        vcf_expect = driver.call().with_output_vcf()
        vcf_expect.has_reference_calls_for_region(chrom, 0, 8)
        vcf_expect.has_record(chrom, 8, "TA", "T")
        vcf_expect.has_reference_calls_for_region(chrom, 10, 16)
        vcf_expect.has_record(chrom, 16, "G", "GT")
        vcf_expect.has_reference_calls_for_region(chrom, 17, 23)
        vcf_expect.has_record(chrom, 23, "A", "T")
        vcf_expect.has_reference_calls_for_region(chrom, 24, 41)
Exemple #14
0
    def test_calls_ref_calls_correctly_with_mnp_that_contains_snp(self):
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAACGCACCCCCCATAAAAAAATTTTTTTTTTT",
            chrom=chrom).with_read(
                "       ..........TTTT.............       ",
                chrom=chrom).with_read(
                    "  ...............TTTT........            ",
                    chrom=chrom).with_read(
                        "    ..............T................      ",
                        chrom=chrom).with_read(
                            "    ..............T....................  ",
                            chrom=chrom).with_read(
                                "..................T........              ",
                                chrom=chrom).with_output_ref_calls(
                                    True).with_allow_MNP_calls(True)

        vcf_expect = driver.call().with_output_vcf()

        vcf_expect.has_reference_calls_for_region(chrom, 0, 17)
        vcf_expect.has_record(chrom, 17, "CCCC", "TTTT")
        vcf_expect.has_record(chrom, 18, "C", "T")
        vcf_expect.has_reference_calls_for_region(chrom, 21, 41)
Exemple #15
0
    def test_depth_computation_all_reads_spanning_reference_with_insertion(
            self):
        sample_name = "bah"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAAAAAAC*AAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read(
                "................T.......................",
                n_rev=5,
                n_fwd=5,
                sample_name=sample_name).with_output_ref_calls(
                    True).with_allow_MNP_calls(False)

        expect = driver.call()
        vcf_expect = expect.with_output_vcf()

        vcf_expect \
            .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)

        vcf_expect \
            .has_record_for_variant(Variant(chrom, 15, "C", "CT"))\
            .with_sample(sample_name).has_read_depth(10)

        vcf_expect \
            .has_record_for_variant(Variant(chrom, 16, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)
Exemple #16
0
    def test_genotypes_mnp_correctly_with_supporting_reads(self):
        chrom = "22"
        variant = Variant(chrom, 11, "AAA", "CAC")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant)
        gv_builder.build().index()

        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.C.......",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_read(
                    "...........C.........",
                    n_fwd=5,
                    n_rev=5,
                    chrom=chrom,
                    sample_name=dodgy_sample).with_genotype_alleles(
                        gv_builder.compressed_filename)

        expect = driver.call()
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("./1")
Exemple #17
0
    def test_phasing_for_isolated_snp_on_one_sample_only(self):
        sample_1 = "sample_1"
        sample_2 = "sample_2"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
            "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom
        ).with_read(
            "........T............", n_fwd=10, n_rev=10, sample_name=sample_1
        ).with_read(
            ".....................", n_fwd=10, n_rev=10, sample_name=sample_2
        )
        svc_driver.with_output_phased_genotypes(True)

        expect = svc_driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.record_count(1)

        record_expect = vcf_expect.has_record_for_variant(
            Variant(chrom, 8, "G", "T"))

        sample_1_expect = record_expect.with_sample(sample_1)
        sample_1_expect.has_phased_genotype("1|1")
        sample_1_expect.has_phase_set_id(str(8))
        sample_1_expect.has_phase_set_quality(MAX_PHRED)

        sample_2_expect = record_expect.with_sample(sample_2)
        sample_2_expect.has_phased_genotype("0|0")
        sample_2_expect.has_phase_set_id(str(8))
        sample_2_expect.has_phase_set_quality(MAX_PHRED)
Exemple #18
0
    def test_should_report_unkown_value_for_allele_frequence_when_depth_is_zero(
            self):
        # Only way to output depth zero for sample and variant is to have
        # another good sample
        good_sample = "good_sample"
        empty_sample = "empty_sample"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence("AAACGTAGCTGTGCACCCCCAAA",
                                 chrom=chrom).with_read(
                                     "..........T............",
                                     n_fwd=10,
                                     n_rev=10,
                                     sample_name=good_sample,
                                 ).with_read(
                                     ".......................",
                                     n_fwd=0,
                                     n_rev=0,
                                     sample_name=empty_sample,
                                 )
        vcf = driver.call().with_output_vcf()
        vcf \
            .with_samples([good_sample, empty_sample]) \
            .record_count(1)

        vcf \
            .has_record_for_variant(Variant(chrom, 10, "G", "T")) \
            .with_sample(empty_sample) \
            .has_read_depth(0) \
            .has_variant_allelic_frequency(None)
    def test_phase_alignment_for_two_snps_in_different_clusters_on_different_strands(
            self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCCTGGGGGGGGGTGGGGGGGGGGGCAAAAAAAAAA", chrom=chrom) \
            .with_read(
                ".......A.................................", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".............................A...........", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_output_phased_genotypes(True) \
            .with_max_cluster_distance(10) \

        vcf_expect = svc_driver.call()\
            .with_output_vcf()\
            .record_count(2)

        vcf_expect.has_record_for_variants(Variant(chrom, 7, "C", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("1|0")\
            .has_phase_set_id("7")

        vcf_expect.has_record_for_variants(Variant(chrom, 29, "G", "A"))\
            .with_sample(sample_name)\
            .has_exact_phased_genotypes("0|1")\
            .has_phase_set_id("7")
Exemple #20
0
    def test_should_record_the_read_support_insertion_and_snp_on_same_strand(
            self):
        sample = "sample"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence("AAACGTAGCTG*GCACCCCCAAA",
                                 chrom=chrom).with_read(
                                     "...........*...........",
                                     n_fwd=10,
                                     n_rev=10,
                                     sample_name=sample,
                                 ).with_read(
                                     "..........CT...........",
                                     n_fwd=10,
                                     n_rev=10,
                                     sample_name=sample,
                                 )

        vcf = driver.call().with_output_vcf()

        vcf \
            .with_samples([sample]) \
            .record_count(2)

        vcf \
            .has_record_for_variant(Variant(chrom, 9, 'T', 'TC')) \
            .with_sample(sample) \
            .has_read_depth(40) \
            .has_allelic_read_support(20, 20) \
            .has_genotype("0/1")
    def test_calls_correct_reference_when_one_sample_has_snp_and_tother_has_indel(self):
        chrom = "1"
        sample_1 = "sample_1"
        sample_2 = "sample_2"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAACGCACG*CCCCATAAAAAAATTTTTTTTTTT", chrom=chrom
        ).with_read(
            "       ..........T................        ", chrom=chrom, sample_name=sample_1
        ).with_read(
            "  ...............T...........             ", chrom=chrom, sample_name=sample_1
        ).with_read(
            "    ...........T.*................        ", chrom=chrom, sample_name=sample_2
        ).with_read(
            "    ...........T.*.....................   ", chrom=chrom, sample_name=sample_2
        ).with_read(
            "...............T.*.........               ", chrom=chrom, sample_name=sample_2
        ).with_output_ref_calls(True)

        vcf_expect = driver.call().with_output_vcf()

        # Has only 4 records which are:-
        vcf_expect.has_reference_calls_for_region(chrom, 0, 15)
        vcf_expect.has_record(chrom, 15, "C", "T")
        vcf_expect.has_record(chrom, 16, "G", "GT")
        vcf_expect.has_reference_calls_for_region(chrom, 17, 41)
Exemple #22
0
    def test_should_count_reads_that_do_not_overlap_the_calling_region(self):
        sample = "seed"
        chrom = "1"

        driver = SVCDriver(self).with_allow_MNP_calls(True)
        driver.with_ref_sequence(
            "GAAAAAAAAAAACGCACCCCCAAATTTTTTTTAA***********AAAATAAAAAACGCACCCCCAAATTTTTTTTAA",
            chrom=chrom
        ).with_read(
            "                                             ..........G......................",
            n_fwd=10,
            n_rev=10,
            sample_name=sample,
        ).with_read(
            "..................................AAAATAAAAAG                                 ",
            n_fwd=10,
            n_rev=10,
            sample_name=sample,
        ).with_region_string("{}:{}-{}".format(chrom, 34, 55))

        vcf = driver.call().with_output_vcf()
        vcf \
            .has_record_for_variant(Variant(chrom, 44, 'A', 'G')) \
            .with_info() \
            .with_field("DP", [40]) \
            .with_field("VC", [40])
Exemple #23
0
    def test_bad_reads_filter_not_applied_when_median_read_is_good(self):
        svc = SVCDriver(self) \
            .with_var_filters("BR") \
            .with_bad_reads_window_size(7) \
            .with_min_bad_reads_score(20)

        svc.with_ref_sequence(
            # 1234567890123456789
            "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG").with_read(
                "................G...........................",
                "           1      1                         ",
                n_rev=10,
                n_fwd=10).with_read(
                    "................G...........................",
                    "         4444444 4444444                    ",
                    n_rev=11,
                    n_fwd=10)

        expect = svc.call()
        vcf_expectation = expect.with_output_vcf()
        vcf_expectation.record_count(1)

        vcf_expectation \
            .has_record_for_variant(Variant(DEFAULT_CHROM, 16, "T", "G")) \
            .with_no_filters()
    def test_should_stop_mildly_allele_and_strand_biased_calls(self):
        chrom = 'chr1'
        svc = SVCDriver(self)
        reads = 10
        allele_bias = 5
        strand_bias = 4

        svc.with_ref_sequence(
            "AAAGCGTACAACCGGGTTAGTCACAAACCCGTTACGTATGCATG",
            chrom=chrom).with_read(
                "............................................",
                n_rev=reads + allele_bias + strand_bias,
                n_fwd=reads + allele_bias - strand_bias,
                chrom=chrom).with_read(
                    "................G...........................",
                    n_rev=reads - allele_bias - strand_bias,
                    n_fwd=reads - allele_bias + strand_bias,
                    chrom=chrom)

        expect = svc.call()

        expect.with_output_vcf() \
            .record_count(1) \
            .has_record_for_variant(Variant(chrom, 16, 'T', 'G')) \
            .with_filters({'AB+SB'})
    def test_should_call_basic_snps(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self).with_ref_sequence(
            "GCCCCAGCCTCCCAAAGTGCATTGATTTTGTTGTTGTTGTGCTTATTTGCACTCCAGCCTGGCCTCTCCTTTCTTG",
            chrom=chrom
        ).with_read(
            "...............T.........A...............G..................................",
            n_fwd=10,
            n_rev=10,
            sample_name=sample_name
        ).with_read(
            ".........................A..........................A.......................",
            n_fwd=10,
            n_rev=10,
            sample_name=sample_name).with_normalize_variant_calls(True)

        expect = svc_driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.record_count(4)

        vcf_expect.has_record(chrom, 15, "A",
                              "T").with_sample(sample_name).has_genotype("1|0")
        vcf_expect.has_record(chrom, 25, "T",
                              "A").with_sample(sample_name).has_genotype("1|1")
        vcf_expect.has_record(chrom, 41, "C",
                              "G").with_sample(sample_name).has_genotype("1|0")
        vcf_expect.has_record(chrom, 52, "T",
                              "A").with_sample(sample_name).has_genotype("0|1")
Exemple #26
0
    def test_min_depth_computation_with_mixed_depth_of_reads(self):
        sample_name = "bah"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read(
                "..............................          ",
                n_rev=5,
                n_fwd=5,
                sample_name=sample_name).with_read(
                    "          ..............................",
                    n_rev=3,
                    n_fwd=3,
                    sample_name=sample_name).with_output_ref_calls(True)

        expect = driver.call()

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(10).has_min_read_depth(10)

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 10, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(16).has_min_read_depth(16)

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 30, "A", ref_alt))\
            .with_sample(sample_name).has_read_depth(6).has_min_read_depth(6)
Exemple #27
0
    def test_doesnt_give_a_flying_damn_about_spurious_filters(self):
        chrom = "22"
        variant = Variant(chrom, 11, "A", "C")

        gv_builder = VCFBuilder(join(self.work_dir, "genotype.vcf"))
        gv_builder.with_record_from_variant(variant,
                                            filters={"#$.:@$%$%^&**()7!"})
        gv_builder.build().index()
        driver = SVCDriver(self)

        dodgy_sample = "bobs_your_uncle"
        driver.with_ref_sequence(
            "ACGCCCCCTGCAAAAAAAAAA", chrom=chrom, pos_from=0).with_read(
                "...........C.........",
                n_fwd=5,
                n_rev=5,
                chrom=chrom,
                sample_name=dodgy_sample).with_genotype_alleles(
                    gv_builder.compressed_filename)

        expect = driver.call(expected_success=True)
        expect.with_output_vcf()\
            .has_record_for_variant(variant)\
            .with_sample(dodgy_sample)\
            .has_genotype("1/1")
Exemple #28
0
    def test_min_depth_computation_with_mixed_depth_of_reads_when_no_chunking_occurs(
            self):
        sample_name = "bah"
        chrom = "1"

        driver = SVCDriver(self)
        driver.with_ref_sequence(
            "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", chrom=chrom).with_read(
                "........................................",
                n_rev=10,
                n_fwd=10,
                sample_name=sample_name).with_read(
                    "...................................     ",
                    n_rev=2,
                    n_fwd=2,
                    sample_name=sample_name).with_read(
                        "   ...................................  ",
                        n_rev=1,
                        n_fwd=1,
                        sample_name=sample_name).with_output_ref_calls(True)

        expect = driver.call()

        expect\
            .with_output_vcf()\
            .has_record_for_variant(Variant(chrom, 0, "A", ref_alt))\
            .with_sample(sample_name)\
            .has_read_depth(round(20 + 4 * 35 / 40 + 2 * 35 / 40))\
            .has_min_read_depth(20)
Exemple #29
0
    def test_should_call_variants(self):
        chrom = 'chr1'
        sample_name = 'sample'
        svc = SVCDriver(self) \
            .with_ploidy(3)

        svc.with_ref_sequence(
            "AAAGCGTACAACCGGGTTAGTC***AACCCGTTACGTATGCATG", chrom=chrom
        ).with_read(
            "......C.........G.....ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name
        ).with_read(
            "......C...............ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name
        ).with_read(
            "......................ATG.......***.........", n_rev=10, n_fwd=10, chrom=chrom, sample_name=sample_name)

        expect = svc.call()

        vcf = expect \
            .with_output_vcf() \
            .record_count(4)

        vcf.has_record_for_variant(Variant(chrom, 6, 'T', 'C')).with_sample(sample_name).has_genotype('0/1/1')
        vcf.has_record_for_variant(Variant(chrom, 16, 'T', 'G')).with_sample(sample_name).has_genotype('0/0/1')
        vcf.has_record_for_variant(Variant(chrom, 21, 'C', 'CATG')).with_sample(sample_name).has_genotype('1/1/1')
        vcf.has_record_for_variant(Variant(chrom, 28, 'TTAC', 'T')).with_sample(sample_name).has_genotype('1/1/1')
    def test_phasing_for_isolated_heterozygous_variant(self):
        sample_name = "a_sample"
        chrom = "1"

        svc_driver = SVCDriver(self)\
            .with_ref_sequence(
                "ACGCCCCTGCAAAAAAAAAAT", chrom=chrom) \
            .with_read(
                "........T............", n_fwd=10, n_rev=10, sample_name=sample_name) \
            .with_read(
                ".....................", n_fwd=10, n_rev=10, sample_name=sample_name)

        svc_driver.with_output_phased_genotypes(True)

        expect = svc_driver.call()
        vcf_expect = expect.with_output_vcf()
        vcf_expect.record_count(1)

        record_expect = vcf_expect.has_record_for_variants(
            Variant(chrom, 8, "G", "T"))

        sample_expect = record_expect.with_sample(sample_name)
        sample_expect.has_phased_genotypes("0|1")
        sample_expect.has_phase_set_id(str(8))
        sample_expect.has_phase_set_quality(MAX_PHRED)