Exemple #1
0
 def test_validate_vcf_hc_pairs_raisesIfNoMissingVcfFiles(self):
     vcf_hc_pairs = [(MockFileReader("A.vcf"), MockFileReader("A.hc")),
                     (None, MockFileReader("B.hc"))]
     self.assertRaisesRegexp(
         utils.UsageError,
         r"The VarScan high-confidence file \[B.hc\] has no matching VCF file.",
         self.caller._validate_vcf_hc_pairs, vcf_hc_pairs)
Exemple #2
0
    def test_claim_vcfAndFilterFileNameGiven(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [self.entab("chrom|position|ref|var"), record1]
        content2 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("patientA.indel.Somatic.foo.bar", content1)
        reader2 = MockFileReader("patientA.indel.vcf", content2)
        reader3 = MockFileReader("patientA.snp.Somatic.foo.bar", content1)
        reader4 = MockFileReader("patientA.snp.vcf", content2)
        reader5 = MockFileReader("patientA.readme", ["foo"])
        file_readers = [reader1, reader2, reader3, reader4, reader5]

        caller = varscan.Varscan()
        caller.hc_file_pattern = re.compile("foo.bar$")
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(1, len(unrecognized_readers))
        self.assertEquals([reader5], unrecognized_readers)
        self.assertEquals(2, len(vcf_readers))
        self.assertIsInstance(vcf_readers[0], varscan._VarscanVcfReader)
        self.assertEquals(reader2.file_name,
                          vcf_readers[0]._vcf_reader.file_name)
        self.assertEquals(reader1.file_name,
                          vcf_readers[0]._som_hc_file_reader.file_name)
        self.assertIsInstance(vcf_readers[1], varscan._VarscanVcfReader)
        self.assertEquals(reader4.file_name,
                          vcf_readers[1]._vcf_reader.file_name)
        self.assertEquals(reader3.file_name,
                          vcf_readers[1]._som_hc_file_reader.file_name)
Exemple #3
0
 def append_hc_files(readers,
                     file1="snp.somatic.hc.fpfilter.pass",
                     file2="indel.somatic.hc.fpfilter.pass",
                     content1=None,
                     content2=None):
     if not content1:
         content1 = []
     if not content2:
         content2 = []
     readers.append(MockFileReader(file1, content1))
     readers.append(MockFileReader(file2, content2))
Exemple #4
0
    def test_claim_allSnpOrIndelOkay(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=strelka", "#chrom", record1]
        reader1 = MockFileReader("fileA.indels.vcf", content1)
        reader2 = MockFileReader("fileB.indels.vcf", content1)
        file_readers = [reader1, reader2]

        caller = strelka.Strelka()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(0, len(unrecognized_readers))
        self.assertEquals(2, len(vcf_readers))
Exemple #5
0
    def test_claim_ignoresOtherCallers(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=Foo", "#chrom", record1]
        content2 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("fileA.txt", content1)
        reader2 = MockFileReader("fileA.vcf", content2)
        file_readers = [reader1, reader2]

        caller = varscan.Varscan()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(1, len(unrecognized_readers))
        self.assertEquals([reader1], unrecognized_readers)
        self.assertEquals(1, len(vcf_readers))
Exemple #6
0
    def test_claim_VCFHasNoMatchingHc(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [self.entab("chrom|position"), record1]
        content2 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("patientA.vcf", content2)
        reader2 = MockFileReader("patientB.vcf", content2)
        reader3 = MockFileReader("patientB.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        file_readers = [reader1, reader2, reader3]

        caller = varscan.Varscan()
        self.assertRaisesRegexp(
            utils.UsageError,
            r"The VarScan VCF file \[patientA.vcf\] has no matching high-confidence file.",
            caller.claim, file_readers)
Exemple #7
0
    def test_claim_mismatchingSnpIndelFiles(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=strelka", "#chrom", record1]
        reader1 = MockFileReader("fileA.snvs.vcf", content1)
        reader2 = MockFileReader("fileA.indels.vcf", content1)
        reader3 = MockFileReader("fileB.indels.vcf", content1)
        file_readers = [reader1, reader2, reader3]

        caller = strelka.Strelka()
        self.assertRaisesRegexp(
            utils.JQException,
            r"Some Strelka VCFs were missing either a snvs or indels file. Review inputs/command options and try again.",
            caller.claim, file_readers)
        actual_log_errors = test.utils.mock_logger.messages["ERROR"]
        expected_log_errors = ["Strelka VCF [fileB.indels] has no snvs file."]
        self.assertEquals(expected_log_errors, actual_log_errors)
Exemple #8
0
    def test_claim_varscanVcfOnly(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("fileA.snp.vcf", content1)
        reader2 = MockFileReader("fileB.snp.vcf", content1)
        file_readers = [reader1, reader2]

        caller = varscan.Varscan()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(0, len(unrecognized_readers))
        self.assertEquals([], unrecognized_readers)
        self.assertEquals(2, len(vcf_readers))
        self.assertIsInstance(vcf_readers[0], varscan._VarscanVcfReader)

        self.assertEquals(reader1.file_name,
                          vcf_readers[0]._vcf_reader.file_name)
Exemple #9
0
    def test_claim(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=strelka", "#chrom", record1]
        content2 = ["##foo", "##MuTect", "#chrom", record1]
        reader1 = MockFileReader("fileA.vcf", content1)
        reader2 = MockFileReader("fileB.vcf", content2)
        file_readers = [reader1, reader2]

        caller = strelka.Strelka()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(1, len(unrecognized_readers))
        self.assertEquals([reader2], unrecognized_readers)
        self.assertEquals(1, len(vcf_readers))
        self.assertIsInstance(vcf_readers[0], strelka._StrelkaVcfReader)
        self.assertEquals(reader1.file_name,
                          vcf_readers[0]._vcf_reader.file_name)
Exemple #10
0
    def test_claim_vcfAndInvalidFilterFile(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [self.entab("chrom|pos|ref|alt"), record1]
        content2 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("patientA.indel.Somatic.hc.fpfilter.pass",
                                 content1)
        reader2 = MockFileReader("patientA.indel.vcf", content2)
        reader3 = MockFileReader("patientA.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        reader4 = MockFileReader("patientA.snp.vcf", content2)
        reader5 = MockFileReader("patientA.readme", ["foo"])
        file_readers = [reader1, reader2, reader3, reader4, reader5]

        caller = varscan.Varscan()
        self.assertRaisesRegexp(
            utils.JQException,
            r"The \[2\] input files \[.*\] match high-confidence file names, but the file header is invalid or missing. Review inputs and try again.",
            caller.claim, file_readers)
Exemple #11
0
 def test_add_tag_values_highConfidenceDoesNotChangeFilter(self):
     record = vcf.VcfRecord("chr1", "42", "ref", "alt", vcf_filter="pass")
     input_reader = MockFileReader("foo.txt", [
         "chrom\tposition\tref\tvar", "chr1\t42\tref\tvar",
         "chr2\t50\tref\tvar"
     ])
     expected = "pass"
     actual = _HCTag(input_reader).add_tag_values(record).filter
     self.assertEquals(expected, actual)
Exemple #12
0
 def test_add_tag_values_lowConfidencePassingReplacesFilter(self):
     record = vcf.VcfRecord("chr1", "30", "ref", "alt", vcf_filter="pass")
     input_reader = MockFileReader("foo.txt", [
         "chrom\tposition\tref\tvar", "chr1\t42\tref\tvar",
         "chr2\t50\tref\tvar"
     ])
     tag = _HCTag(input_reader)
     expected = tag._TAG_ID
     actual = tag.add_tag_values(record).filter
     self.assertEquals(expected, actual)
Exemple #13
0
    def test_claim(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=strelka", "#chrom", record1]
        content2 = [
            "##foo", "##MuTect=123", "##FORMAT=<ID=AF,...>", "#chrom", record1
        ]
        reader1 = MockFileReader("fileA.vcf", content1)
        reader2 = MockFileReader("fileB.vcf", content2)
        file_readers = [reader1, reader2]

        caller = mutect.Mutect()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(1, len(unrecognized_readers))
        self.assertEquals([reader1], unrecognized_readers)
        self.assertEquals(1, len(vcf_readers))
        self.assertIsInstance(vcf_readers[0], mutect._MutectVcfReader)
        self.assertEquals(reader2.file_name,
                          vcf_readers[0]._vcf_reader.file_name)
Exemple #14
0
    def test_claim_multiplePatients(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=VarScan2", "#chrom", record1]
        content2 = ["chrom\tposition", "1\t23"]
        reader1 = MockFileReader("p2.fileA.vcf", content1)
        reader2 = MockFileReader("p2.fileA.Somatic.hc.fpfilter.pass", content2)
        reader3 = MockFileReader("p3.fileA.Somatic.hc.fpfilter.pass", content2)
        reader4 = MockFileReader("p3.fileA.vcf", content1)
        file_readers = [reader1, reader2, reader3, reader4]

        caller = varscan.Varscan()
        dummy, vcf_readers = caller.claim(file_readers)

        self.assertEquals(2, len(vcf_readers))
        self.assertIsInstance(vcf_readers[0], varscan._VarscanVcfReader)
        self.assertEquals("p2.fileA.vcf", vcf_readers[0]._vcf_reader.file_name)
        self.assertIn("_HCTag", self._get_tag_class_names(vcf_readers[0]))
        self.assertIn("_HCTag", self._get_tag_class_names(vcf_readers[1]))
        self.assertEquals(reader1.file_name,
                          vcf_readers[0]._vcf_reader.file_name)
Exemple #15
0
    def test_claim_filterRegexDoesNotMatch(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("patientA.vcf", content1)
        file_readers = [reader1]

        caller = varscan.Varscan()
        caller.hc_file_pattern = re.compile("foo.bar$")
        self.assertRaisesRegexp(
            utils.UsageError,
            r"The VarScan high-confidence filename regex \[foo\.bar\$\] didn't match any files in the input directory. The beginning of the high-confidence filename must exactly match a VCF filename up to the .vcf extension. Review inputs/command options and try again.",
            caller.claim, file_readers)
Exemple #16
0
    def test_claim_ignoresNonVcfExtensions(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##MuTect=123", "#chrom", record1]
        reader1 = MockFileReader("fileA.txt", content1)
        file_readers = [reader1]

        caller = mutect.Mutect()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(1, len(unrecognized_readers))
        self.assertEquals([reader1], unrecognized_readers)
        self.assertEquals(0, len(vcf_readers))
Exemple #17
0
    def test_claim_ignores_non_vcf_files(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = ["##foo", "##source=strelka", "#chrom", record1]
        reader1 = MockFileReader("fileA.txt", content1)
        file_readers = [reader1]

        caller = strelka.Strelka()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(1, len(unrecognized_readers))
        self.assertEquals([reader1], unrecognized_readers)
        self.assertEquals(0, len(vcf_readers))
Exemple #18
0
    def test_claim_metaheaderRecognizesMutectV2x(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [
            "##foo", "##MuTect=2.1", "##FORMAT=<ID=FA,...>", "#chrom", record1
        ]
        reader1 = MockFileReader("fileA.vcf", content1)
        file_readers = [reader1]

        caller = mutect.Mutect()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(0, len(unrecognized_readers))
        self.assertEquals(1, len(vcf_readers))
Exemple #19
0
    def test_claim_vcfExtensionCaseInsensitive(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [
            "##foo", "##MuTect=123", "##FORMAT=<ID=AF,...>", "#chrom", record1
        ]
        reader1 = MockFileReader("fileA.VcF", content1)
        file_readers = [reader1]

        caller = mutect.Mutect()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(0, len(unrecognized_readers))
        self.assertEquals(1, len(vcf_readers))
Exemple #20
0
    def test_claim_metaheaderRecognizesMutectV4x(self):
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [
            "##foo",
            '##GATKCommandLine=<ID=Mutect2,CommandLine="Mutect2 ...">',
            "##FORMAT=<ID=AF,...>", "#chrom", record1
        ]
        reader1 = MockFileReader("fileB.vcf", content1)
        file_readers = [reader1]

        caller = mutect.Mutect()
        unrecognized_readers, vcf_readers = caller.claim(file_readers)

        self.assertEquals(0, len(unrecognized_readers))
        self.assertEquals(1, len(vcf_readers))
Exemple #21
0
    def test_vcf_records_SomHcFileSNP(self):
        record1 = vcf.VcfRecord(chrom="chr1",
                                pos="21",
                                ref="A",
                                alt="G",
                                vcf_filter="PASS")
        record2 = vcf.VcfRecord(chrom="chr1",
                                pos="22",
                                ref="A",
                                alt="T",
                                vcf_filter="PASS")
        vcf_reader = MockVcfReader(records=[record1, record2])

        content1 = ["chrom\tposition", "chr1\t21", "chr1\t22"]
        somatic_hc_reader = MockFileReader("fileA.Somatic.hc.fpfilter.pass",
                                           content1)

        varscan_vcf_reader = varscan._VarscanVcfReader(vcf_reader,
                                                       somatic_hc_reader)
        vcf_records = [record for record in varscan_vcf_reader.vcf_records()]

        self.assertEquals(2, len(vcf_records))
        self.assertIn("PASS", vcf_records[0].filter)
        self.assertIn("PASS", vcf_records[1].filter)
Exemple #22
0
 def test_validate_vcf_hc_pairs(self):
     self.caller._validate_vcf_hc_pairs([
         (MockFileReader("A.vcf"), MockFileReader("A.hc")),
         (MockFileReader("B.vcf"), MockFileReader("B.hc"))
     ])
     self.ok()
Exemple #23
0
 def test_validate_vcf_hc_pairs_okIfNoHcFiles(self):
     self.caller._validate_vcf_hc_pairs([(MockFileReader("A.vcf"), None),
                                         (MockFileReader("B.vcf"), None)])
     self.ok()
Exemple #24
0
 def test_validate_filter_file_validFile(self):
     file_reader = MockFileReader("p1.hc.fpfilter.pass",
                                  ["chrom\tposition"])
     caller = varscan.Varscan()
     valid_reader = caller._validate_filter_file(file_reader)
     self.assertEquals("p1.hc.fpfilter.pass", valid_reader.file_name)
Exemple #25
0
 def test_validate_filter_file_invalidFile(self):
     file_reader = MockFileReader("p1.hc.fpfilter.pass",
                                  ["chrom\tpos\tref"])
     caller = varscan.Varscan()
     valid_reader = caller._validate_filter_file(file_reader)
     self.assertEquals(None, valid_reader)
Exemple #26
0
    def test_claim_vcfAnd6InvalidFilterFiles(self):
        #pylint: disable=too-many-locals
        record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t."
        content1 = [self.entab("chrom|pos|ref|alt"), record1]
        content2 = ["##foo", "##source=VarScan2", "#chrom", record1]
        reader1 = MockFileReader("patientA.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        reader2 = MockFileReader("patientA.snp.vcf", content2)
        reader3 = MockFileReader("patientB.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        reader4 = MockFileReader("patientB.snp.vcf", content2)
        reader5 = MockFileReader("patientC.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        reader6 = MockFileReader("patientC.snp.vcf", content2)
        reader7 = MockFileReader("patientD.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        reader8 = MockFileReader("patientD.snp.vcf", content2)
        reader9 = MockFileReader("patientE.snp.Somatic.hc.fpfilter.pass",
                                 content1)
        reader10 = MockFileReader("patientE.snp.vcf", content2)
        reader11 = MockFileReader("patientF.snp.Somatic.hc.fpfilter.pass",
                                  content1)
        reader12 = MockFileReader("patientF.snp.vcf", content2)
        file_readers = [
            reader1, reader2, reader3, reader4, reader5, reader6, reader7,
            reader8, reader9, reader10, reader11, reader12
        ]

        caller = varscan.Varscan()
        self.assertRaisesRegexp(
            utils.JQException,
            r"The \[6\] input files \[.*\(1 file\(s\) omitted\)\] match high-confidence file names, but the file header is invalid or missing. Review inputs and try again.",
            caller.claim, file_readers)