예제 #1
0
 def test_cli(self):
     oxo_vcf = get_test_data_path(
         "test_input_for_add_oxog_filters_from_maf.vcf.gz")
     vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         with captured_output() as (_, stderr):
             main(args=["add-oxog-filters", vcf_file, oxo_vcf, fn])
         vcf = pysam.VariantFile(fn)
         self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"])
         for record in vcf:
             if (record.contig == "chr1" and record.pos == 10
                     and record.alleles == (
                         "A",
                         "T",
                     )):
                 self.assertEqual(record.filter.keys(), ["oxog"])
             else:
                 self.assertEqual(record.filter.keys(), ["PASS"])
         vcf.close()
         serr = stderr.getvalue()
         self.assertTrue(
             "[gdc_filtration_tools.add_oxog_filters] - Creating tabix index"
             in serr)
         self.assertTrue(
             "[gdc_filtration_tools.add_oxog_filters] - Processed 4 records - Tagged 1; Wrote 4"
             in serr)
         self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr)
     finally:
         cleanup_files(fn)
    def test_cli(self):
        ifa = get_test_data_path("test_oxog_ref.fa")
        imaf = get_test_data_path("test_oxog_annotated.maf")

        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")

        try:
            with captured_output() as (_, stderr):
                main(["dtoxog-maf-to-vcf", imaf, ifa, fn])

            vout = pysam.VariantFile(fn)
            for record in vout:
                self.assertEqual(record.chrom, "chr1")
                self.assertEqual(record.pos, 10)
                self.assertEqual(record.alleles, (
                    "A",
                    "T",
                ))
                self.assertEqual(record.filter.keys(), ["oxog"])
            vout.close()
            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Creating tabix index..."
                in serr)
            self.assertTrue(
                "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Processed 2 records - Wrote 1"
                in serr)
            self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr)
        finally:
            cleanup_files([fn, fn + ".tbi"])
예제 #3
0
    def test_cli(self):
        imets = get_test_data_path("test_oxog_metrics.txt")
        vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
        fa_file = get_test_data_path("test_oxog_ref.fa")
        (fd, fn) = tempfile.mkstemp()
        try:
            with captured_output() as (_, stderr):
                main(args=[
                    "create-dtoxog-maf", vcf_file, fn, fa_file, imets, "32.0"
                ])
                with open(fn, "rt") as fh:
                    self.assertEqual(fh.readline(), "#version 2.4.1\n")
                    header = fh.readline().rstrip("\r\n").split("\t")
                    self.assertEqual(header, MAF_COLUMNS)
                    count = 0
                    for line in fh:
                        dat = dict(zip(header,
                                       line.rstrip("\r\n").split("\t")))
                        self.assertEqual(dat,
                                         TestCreatedToxoGMaf.exp_maf[count])
                        count += 1
                    self.assertEqual(count, 3)

            serr = stderr.getvalue()
            self.assertTrue("Converts a SNP VCF to dToxoG MAF format." in serr)
            self.assertTrue("Processed 3 records" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.create_dtoxog_maf" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
    def test_get_header(self):
        ivcf = get_test_data_path("pindel_test.vcf")
        vcf = pysam.VariantFile(ivcf)
        found_svtype = False
        found_fhet = False
        found_center = False
        try:
            res = get_header(vcf.header)
            for record in res.records:
                if record.type == "INFO":
                    if record.get("ID", "") == "TYPEOFSV":
                        self.assertFalse(found_svtype)
                        found_svtype = True
                    elif record.get("ID", "") == "forcedHet":
                        self.assertFalse(found_fhet)
                        found_fhet = True
                elif record.type == "GENERIC" and record.key == "center":
                    found_center = True
            self.assertEqual(list(res.samples), ["NORMAL", "TUMOR"])
        finally:
            vcf.close()

        self.assertTrue(found_svtype)
        self.assertTrue(found_fhet)
        self.assertFalse(found_center)
    def test_filter_contigs(self):
        ivcf = get_test_data_path("filter_contigs.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf")
        with captured_output() as (_, stderr):
            filter_contigs(ivcf, fn)

        found = 0
        exp_chroms = ["chr1", "chr2"]
        rdr = pysam.VariantFile(fn)
        try:
            for record in rdr:
                self.assertTrue(record.chrom in exp_chroms)
                found += 1
        finally:
            rdr.close()
        self.assertEqual(found, 2)
        cleanup_files(fn)

        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        with captured_output() as (_, stderr):
            filter_contigs(ivcf, fn)

        found = 0
        rdr = pysam.VariantFile(fn)
        try:
            for record in rdr:
                self.assertTrue(record.chrom in exp_chroms)
                found += 1
        finally:
            rdr.close()
        self.assertEqual(found, 2)
        cleanup_files(fn)
 def test_build_new_record(self):
     ifa = get_test_data_path("test_oxog_ref.fa")
     header = generate_header(ifa, "oxog")
     maf = {
         "Chromosome": "chr1",
         "Start_position": "10",
         "Reference_Allele": "A",
         "Tumor_Seq_Allele1": "T",
     }
     (fd, fn) = tempfile.mkstemp(suffix=".vcf")
     vcf = None
     try:
         vcf = pysam.VariantFile(fn, mode="w", header=header)
         record = build_new_record(maf, vcf, "oxog")
         self.assertEqual(record.pos, 10)
         self.assertEqual(record.chrom, "chr1")
         self.assertEqual(record.alleles, (
             "A",
             "T",
         ))
         self.assertEqual(record.filter.keys(), ["oxog"])
     finally:
         if vcf is not None:
             vcf.close()
         cleanup_files(fn)
예제 #7
0
    def test_cli(self):
        ivcf = get_test_data_path("test_somatic_score.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            total = 0
            tagged = 0
            with captured_output() as (_, stderr):
                main(args=["filter-somatic-score", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                for record in vcf:
                    total += 1
                    self.assertTrue(record.pos != 1)
                    if "ssc40" in record.filter:
                        tagged += 1
                        self.assertTrue(record.samples["TUMOR"]["SSC"] == 25)
                vcf.close()
            self.assertEqual(total, 3)
            self.assertEqual(tagged, 1)
            serr = stderr.getvalue()
            self.assertTrue(
                "Filters SomaticSniper VCF files based on Somatic Score." in
                serr)
            self.assertTrue("Filter tag: ssc40" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue(
                "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.filter_somatic_score" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
예제 #8
0
    def test_get_context(self):
        vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
        fa_file = get_test_data_path("test_oxog_ref.fa")

        fasta = pysam.FastaFile(fa_file)
        vcf = pysam.VariantFile(vcf_file)
        exp = ["CTTGGGGGGGG", "GGGGGGGGGGCGGGGGGGGGG", "GGGGGGGTTTACCGGGGGGGG"]
        n = 0
        try:
            for rec in vcf:
                res = get_context(rec, fasta)
                self.assertEqual(res, exp[n])
                n += 1
        finally:
            fasta.close()
            vcf.close()
예제 #9
0
    def test_cli(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp()
        try:
            found = []
            expected = ["chr1:1", "chr2:1"]
            with captured_output() as (_, stderr):
                main(args=["create-oxog-intervals", ivcf, fn])
                with open(fn, "rt") as fh:
                    for line in fh:
                        found.append(line.rstrip("\r\n"))

            self.assertEqual(len(found), 2)
            self.assertEqual(found, expected)
            serr = stderr.getvalue()
            self.assertTrue(
                "Extracts interval-file for Broad OxoG metrics from VCF." in serr
            )
            self.assertTrue("Processed 2 records" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue("gdc_filtration_tools.create_oxog_intervals" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
예제 #10
0
 def test_filter_somatic_score_defaults(self):
     ivcf = get_test_data_path("test_somatic_score.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         total = 0
         tagged = 0
         with captured_output() as (_, stderr):
             filter_somatic_score(ivcf, fn)
             vcf = pysam.VariantFile(fn)
             for record in vcf:
                 total += 1
                 self.assertTrue(record.pos != 1)
                 if "ssc40" in record.filter:
                     tagged += 1
                     self.assertTrue(record.samples["TUMOR"]["SSC"] == 25)
             vcf.close()
         self.assertEqual(total, 3)
         self.assertEqual(tagged, 1)
         serr = stderr.getvalue()
         self.assertTrue(
             "Filters SomaticSniper VCF files based on Somatic Score." in
             serr)
         self.assertTrue("Filter tag: ssc40" in serr)
         self.assertTrue("Creating tabix index..." in serr)
         self.assertTrue(
             "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr)
     finally:
         cleanup_files(fn)
예제 #11
0
    def test_cli(self):
        ivcf = get_test_data_path("sanger_pindel_test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                main(["format-sanger-pindel-vcf", ivcf, fn])

            vcf = pysam.VariantFile(fn)
            self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"])
            rec = next(vcf)
            self.assertEqual(rec.pos, 10)
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))
            self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0))

            rec = next(vcf)
            self.assertEqual(rec.pos, 20)
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))
            self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0))

            with self.assertRaises(StopIteration):
                rec = next(vcf)
            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.format_sanger_pindel_vcf] - Creating tabix index..."
                in serr
            )
            self.assertTrue(
                "[gdc_filtration_tools.format_sanger_pindel_vcf] - Processed 2 records."
                in serr
            )
            self.assertTrue("gdc_filtration_tools.main" in serr)
        finally:
            cleanup_files(fn)
    def test_filter_nonstandard_variants(self):
        ivcf = get_test_data_path("test_nonstandard_variants.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                filter_nonstandard_variants(ivcf, fn)

            vcf = pysam.VariantFile(fn)

            record = next(vcf)
            self.assertTrue(record.chrom == "chr1")

            record = next(vcf)
            self.assertTrue(record.chrom == "chr3")

            with self.assertRaises(StopIteration):
                record = next(vcf)

            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue("Drops non-ACTG loci from a VCF." in serr)
            self.assertTrue("Removing chr2:1:A,R" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr)
        finally:
            cleanup_files(fn)
    def test_cli(self):
        ivcf = get_test_data_path("test_nonstandard_variants.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                main(args=["filter-nonstandard-variants", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                record = next(vcf)
                self.assertTrue(record.chrom == "chr1")

                record = next(vcf)
                self.assertTrue(record.chrom == "chr3")

                with self.assertRaises(StopIteration):
                    record = next(vcf)

                vcf.close()
            serr = stderr.getvalue()
            self.assertTrue("Drops non-ACTG loci from a VCF." in serr)
            self.assertTrue("Removing chr2:1:A,R" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.filter_nonstandard_variants" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
 def test_build_header(self):
     obj = FakeOpts()
     ivcf = get_test_data_path("test.vcf")
     vcf = pysam.VariantFile(ivcf)
     opts = [vcf] + obj.to_build_header()
     res = build_header(*opts)
     vcf.close()
     self.validate_header(obj, res)
 def test_format_gdc_vcf(self):
     ivcf = get_test_data_path("test.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     obj = FakeOpts(ivcf, fn)
     opts = attr.asdict(obj)
     format_gdc_vcf(**opts)
     vcf = pysam.VariantFile(fn)
     hdr = vcf.header.copy()
     vcf.close()
     cleanup_files(fn)
     self.validate_header(obj, hdr)
예제 #16
0
    def test_generate_maf_record(self):
        from gdc_filtration_tools.logger import Logger

        imets = get_test_data_path("test_oxog_metrics.txt")
        mets = load_oxog(imets)
        vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
        fa_file = get_test_data_path("test_oxog_ref.fa")
        fasta = pysam.FastaFile(fa_file)
        vcf = pysam.VariantFile(vcf_file)
        logger = Logger.get_logger("create_dtoxog_maf")
        count = 0
        try:
            for record in vcf:
                maf_record = generate_maf_record(record, fasta, mets, 32.0,
                                                 logger)
                self.assertEqual(maf_record,
                                 TestCreatedToxoGMaf.exp_maf[count])
                count += 1

        finally:
            fasta.close()
            vcf.close()
예제 #17
0
 def test_create_dtoxog_maf(self):
     imets = get_test_data_path("test_oxog_metrics.txt")
     vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
     fa_file = get_test_data_path("test_oxog_ref.fa")
     (fd, fn) = tempfile.mkstemp()
     try:
         with captured_output() as (_, stderr):
             create_dtoxog_maf(vcf_file, fn, fa_file, imets, 32.0)
             with open(fn, "rt") as fh:
                 self.assertEqual(fh.readline(), "#version 2.4.1\n")
                 header = fh.readline().rstrip("\r\n").split("\t")
                 self.assertEqual(header, MAF_COLUMNS)
                 count = 0
                 for line in fh:
                     dat = dict(zip(header,
                                    line.rstrip("\r\n").split("\t")))
                     self.assertEqual(dat,
                                      TestCreatedToxoGMaf.exp_maf[count])
                     count += 1
         serr = stderr.getvalue().split("\n")
         self.assertTrue("Processed 3 records" in serr[2])
     finally:
         cleanup_files(fn)
예제 #18
0
 def test_add_oxog_filters(self):
     oxo_vcf = get_test_data_path(
         "test_input_for_add_oxog_filters_from_maf.vcf.gz")
     vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf")
     try:
         with captured_output() as (_, stderr):
             add_oxog_filters(vcf_file, oxo_vcf, fn)
         vcf = pysam.VariantFile(fn)
         self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"])
         for record in vcf:
             if (record.contig == "chr1" and record.pos == 10
                     and record.alleles == (
                         "A",
                         "T",
                     )):
                 self.assertEqual(record.filter.keys(), ["oxog"])
             else:
                 self.assertEqual(record.filter.keys(), ["PASS"])
         vcf.close()
         serr = stderr.getvalue()
         self.assertTrue("Processed 4 records - Tagged 1; Wrote 4" in serr)
     finally:
         cleanup_files(fn)
    def test_cli(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        obj = FakeOpts(ivcf, fn)
        params = obj.to_cli_list()
        with captured_output() as (_, stderr):
            main(args=params)
        vcf = pysam.VariantFile(fn)
        hdr = vcf.header.copy()
        vcf.close()
        cleanup_files(fn)
        self.validate_header(obj, hdr)

        serr = [i for i in stderr.getvalue().split("\n") if i.rstrip("\r\n")]
        self.assertTrue("gdc_filtration_tools.format_gdc_vcf" in serr[0])
        self.assertTrue("gdc_filtration_tools.main" in serr[-1])
예제 #20
0
 def test_position_filter_dkfz(self):
     ivcf = get_test_data_path("test_dfkz.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         total = 0
         with captured_output() as (_, stderr):
             position_filter_dkfz(ivcf, fn)
             vcf = pysam.VariantFile(fn)
             for record in vcf:
                 total += 1
                 self.assertEqual(record.chrom, "chr2")
             vcf.close()
         self.assertEqual(total, 1)
         serr = stderr.getvalue()
         self.assertTrue("Position Filter for DKFZ." in serr)
         self.assertTrue("Creating tabix index..." in serr)
         self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr)
     finally:
         cleanup_files(fn)
예제 #21
0
    def test_create_oxog_intervals(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp()
        try:
            found = []
            expected = ["chr1:1", "chr2:1"]
            with captured_output() as (_, stderr):
                create_oxog_intervals(ivcf, fn)
                with open(fn, "rt") as fh:
                    for line in fh:
                        found.append(line.rstrip("\r\n"))

            self.assertEqual(len(found), 2)
            self.assertEqual(found, expected)
            serr = stderr.getvalue()
            self.assertTrue(
                "Extracts interval-file for Broad OxoG metrics from VCF." in serr
            )
            self.assertTrue("Processed 2 records" in serr)
        finally:
            cleanup_files(fn)
예제 #22
0
    def test_cli(self):
        ivcf = get_test_data_path("test_dfkz.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            total = 0
            with captured_output() as (_, stderr):
                main(args=["position-filter-dkfz", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                for record in vcf:
                    total += 1
                    self.assertEqual(record.chrom, "chr2")
                vcf.close()
            self.assertEqual(total, 1)
            serr = stderr.getvalue()
            self.assertTrue("Position Filter for DKFZ." in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.position_filter_dkfz" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
    def test_format_pindel_vcf(self):
        ivcf = get_test_data_path("pindel_test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                format_pindel_vcf(ivcf, fn)

            vcf = pysam.VariantFile(fn)
            self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"])
            rec = next(vcf)
            self.assertEqual(rec.info.get("TYPEOFSV"), "INS")
            with self.assertRaises(ValueError):
                rec.info.get("SVTYPE")
            self.assertFalse(rec.info.get("forcedHet"))
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))

            rec = next(vcf)
            self.assertEqual(rec.info.get("TYPEOFSV"), "INS")
            self.assertTrue(rec.info.get("forcedHet"))
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))

            with self.assertRaises(StopIteration):
                rec = next(vcf)
            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.format_pindel_vcf] - Creating tabix index..."
                in serr
            )
            self.assertTrue(
                "[gdc_filtration_tools.format_pindel_vcf] - Processed 2 records."
                in serr
            )
        finally:
            cleanup_files(fn)
 def test_generate_header(self):
     ifa = get_test_data_path("test_oxog_ref.fa")
     header = generate_header(ifa, "TEST")
     self.assertEqual(header.filters.keys(), ["PASS", "TEST"])
     self.assertEqual(list(header.contigs), ["chr1"])
     self.assertEqual(header.contigs.get("chr1").length, 100)
예제 #25
0
 def test_load_oxog(self):
     imets = get_test_data_path("test_oxog_metrics.txt")
     res = load_oxog(imets)
     self.assertEqual(res, TestCreatedToxoGMaf.exp_oxog)