def test_validate_vcf_hc_pairs_raisesIfNoMissingVcfFiles(self): vcf_hc_pairs = [(MockFileReader("A.vcf"), MockFileReader("A.hc")), (None, MockFileReader("B.hc"))] self.assertRaisesRegexp( utils.UsageError, r"The VarScan high-confidence file \[B.hc\] has no matching VCF file.", self.caller._validate_vcf_hc_pairs, vcf_hc_pairs)
def test_claim_vcfAndFilterFileNameGiven(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [self.entab("chrom|position|ref|var"), record1] content2 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("patientA.indel.Somatic.foo.bar", content1) reader2 = MockFileReader("patientA.indel.vcf", content2) reader3 = MockFileReader("patientA.snp.Somatic.foo.bar", content1) reader4 = MockFileReader("patientA.snp.vcf", content2) reader5 = MockFileReader("patientA.readme", ["foo"]) file_readers = [reader1, reader2, reader3, reader4, reader5] caller = varscan.Varscan() caller.hc_file_pattern = re.compile("foo.bar$") unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(1, len(unrecognized_readers)) self.assertEquals([reader5], unrecognized_readers) self.assertEquals(2, len(vcf_readers)) self.assertIsInstance(vcf_readers[0], varscan._VarscanVcfReader) self.assertEquals(reader2.file_name, vcf_readers[0]._vcf_reader.file_name) self.assertEquals(reader1.file_name, vcf_readers[0]._som_hc_file_reader.file_name) self.assertIsInstance(vcf_readers[1], varscan._VarscanVcfReader) self.assertEquals(reader4.file_name, vcf_readers[1]._vcf_reader.file_name) self.assertEquals(reader3.file_name, vcf_readers[1]._som_hc_file_reader.file_name)
def append_hc_files(readers, file1="snp.somatic.hc.fpfilter.pass", file2="indel.somatic.hc.fpfilter.pass", content1=None, content2=None): if not content1: content1 = [] if not content2: content2 = [] readers.append(MockFileReader(file1, content1)) readers.append(MockFileReader(file2, content2))
def test_claim_allSnpOrIndelOkay(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=strelka", "#chrom", record1] reader1 = MockFileReader("fileA.indels.vcf", content1) reader2 = MockFileReader("fileB.indels.vcf", content1) file_readers = [reader1, reader2] caller = strelka.Strelka() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(0, len(unrecognized_readers)) self.assertEquals(2, len(vcf_readers))
def test_claim_ignoresOtherCallers(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=Foo", "#chrom", record1] content2 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("fileA.txt", content1) reader2 = MockFileReader("fileA.vcf", content2) file_readers = [reader1, reader2] caller = varscan.Varscan() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(1, len(unrecognized_readers)) self.assertEquals([reader1], unrecognized_readers) self.assertEquals(1, len(vcf_readers))
def test_claim_VCFHasNoMatchingHc(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [self.entab("chrom|position"), record1] content2 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("patientA.vcf", content2) reader2 = MockFileReader("patientB.vcf", content2) reader3 = MockFileReader("patientB.snp.Somatic.hc.fpfilter.pass", content1) file_readers = [reader1, reader2, reader3] caller = varscan.Varscan() self.assertRaisesRegexp( utils.UsageError, r"The VarScan VCF file \[patientA.vcf\] has no matching high-confidence file.", caller.claim, file_readers)
def test_claim_mismatchingSnpIndelFiles(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=strelka", "#chrom", record1] reader1 = MockFileReader("fileA.snvs.vcf", content1) reader2 = MockFileReader("fileA.indels.vcf", content1) reader3 = MockFileReader("fileB.indels.vcf", content1) file_readers = [reader1, reader2, reader3] caller = strelka.Strelka() self.assertRaisesRegexp( utils.JQException, r"Some Strelka VCFs were missing either a snvs or indels file. Review inputs/command options and try again.", caller.claim, file_readers) actual_log_errors = test.utils.mock_logger.messages["ERROR"] expected_log_errors = ["Strelka VCF [fileB.indels] has no snvs file."] self.assertEquals(expected_log_errors, actual_log_errors)
def test_claim_varscanVcfOnly(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("fileA.snp.vcf", content1) reader2 = MockFileReader("fileB.snp.vcf", content1) file_readers = [reader1, reader2] caller = varscan.Varscan() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(0, len(unrecognized_readers)) self.assertEquals([], unrecognized_readers) self.assertEquals(2, len(vcf_readers)) self.assertIsInstance(vcf_readers[0], varscan._VarscanVcfReader) self.assertEquals(reader1.file_name, vcf_readers[0]._vcf_reader.file_name)
def test_claim(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=strelka", "#chrom", record1] content2 = ["##foo", "##MuTect", "#chrom", record1] reader1 = MockFileReader("fileA.vcf", content1) reader2 = MockFileReader("fileB.vcf", content2) file_readers = [reader1, reader2] caller = strelka.Strelka() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(1, len(unrecognized_readers)) self.assertEquals([reader2], unrecognized_readers) self.assertEquals(1, len(vcf_readers)) self.assertIsInstance(vcf_readers[0], strelka._StrelkaVcfReader) self.assertEquals(reader1.file_name, vcf_readers[0]._vcf_reader.file_name)
def test_claim_vcfAndInvalidFilterFile(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [self.entab("chrom|pos|ref|alt"), record1] content2 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("patientA.indel.Somatic.hc.fpfilter.pass", content1) reader2 = MockFileReader("patientA.indel.vcf", content2) reader3 = MockFileReader("patientA.snp.Somatic.hc.fpfilter.pass", content1) reader4 = MockFileReader("patientA.snp.vcf", content2) reader5 = MockFileReader("patientA.readme", ["foo"]) file_readers = [reader1, reader2, reader3, reader4, reader5] caller = varscan.Varscan() self.assertRaisesRegexp( utils.JQException, r"The \[2\] input files \[.*\] match high-confidence file names, but the file header is invalid or missing. Review inputs and try again.", caller.claim, file_readers)
def test_add_tag_values_highConfidenceDoesNotChangeFilter(self): record = vcf.VcfRecord("chr1", "42", "ref", "alt", vcf_filter="pass") input_reader = MockFileReader("foo.txt", [ "chrom\tposition\tref\tvar", "chr1\t42\tref\tvar", "chr2\t50\tref\tvar" ]) expected = "pass" actual = _HCTag(input_reader).add_tag_values(record).filter self.assertEquals(expected, actual)
def test_add_tag_values_lowConfidencePassingReplacesFilter(self): record = vcf.VcfRecord("chr1", "30", "ref", "alt", vcf_filter="pass") input_reader = MockFileReader("foo.txt", [ "chrom\tposition\tref\tvar", "chr1\t42\tref\tvar", "chr2\t50\tref\tvar" ]) tag = _HCTag(input_reader) expected = tag._TAG_ID actual = tag.add_tag_values(record).filter self.assertEquals(expected, actual)
def test_claim(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=strelka", "#chrom", record1] content2 = [ "##foo", "##MuTect=123", "##FORMAT=<ID=AF,...>", "#chrom", record1 ] reader1 = MockFileReader("fileA.vcf", content1) reader2 = MockFileReader("fileB.vcf", content2) file_readers = [reader1, reader2] caller = mutect.Mutect() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(1, len(unrecognized_readers)) self.assertEquals([reader1], unrecognized_readers) self.assertEquals(1, len(vcf_readers)) self.assertIsInstance(vcf_readers[0], mutect._MutectVcfReader) self.assertEquals(reader2.file_name, vcf_readers[0]._vcf_reader.file_name)
def test_claim_multiplePatients(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=VarScan2", "#chrom", record1] content2 = ["chrom\tposition", "1\t23"] reader1 = MockFileReader("p2.fileA.vcf", content1) reader2 = MockFileReader("p2.fileA.Somatic.hc.fpfilter.pass", content2) reader3 = MockFileReader("p3.fileA.Somatic.hc.fpfilter.pass", content2) reader4 = MockFileReader("p3.fileA.vcf", content1) file_readers = [reader1, reader2, reader3, reader4] caller = varscan.Varscan() dummy, vcf_readers = caller.claim(file_readers) self.assertEquals(2, len(vcf_readers)) self.assertIsInstance(vcf_readers[0], varscan._VarscanVcfReader) self.assertEquals("p2.fileA.vcf", vcf_readers[0]._vcf_reader.file_name) self.assertIn("_HCTag", self._get_tag_class_names(vcf_readers[0])) self.assertIn("_HCTag", self._get_tag_class_names(vcf_readers[1])) self.assertEquals(reader1.file_name, vcf_readers[0]._vcf_reader.file_name)
def test_claim_filterRegexDoesNotMatch(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("patientA.vcf", content1) file_readers = [reader1] caller = varscan.Varscan() caller.hc_file_pattern = re.compile("foo.bar$") self.assertRaisesRegexp( utils.UsageError, r"The VarScan high-confidence filename regex \[foo\.bar\$\] didn't match any files in the input directory. The beginning of the high-confidence filename must exactly match a VCF filename up to the .vcf extension. Review inputs/command options and try again.", caller.claim, file_readers)
def test_claim_ignoresNonVcfExtensions(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##MuTect=123", "#chrom", record1] reader1 = MockFileReader("fileA.txt", content1) file_readers = [reader1] caller = mutect.Mutect() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(1, len(unrecognized_readers)) self.assertEquals([reader1], unrecognized_readers) self.assertEquals(0, len(vcf_readers))
def test_claim_ignores_non_vcf_files(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=strelka", "#chrom", record1] reader1 = MockFileReader("fileA.txt", content1) file_readers = [reader1] caller = strelka.Strelka() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(1, len(unrecognized_readers)) self.assertEquals([reader1], unrecognized_readers) self.assertEquals(0, len(vcf_readers))
def test_claim_metaheaderRecognizesMutectV2x(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [ "##foo", "##MuTect=2.1", "##FORMAT=<ID=FA,...>", "#chrom", record1 ] reader1 = MockFileReader("fileA.vcf", content1) file_readers = [reader1] caller = mutect.Mutect() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(0, len(unrecognized_readers)) self.assertEquals(1, len(vcf_readers))
def test_claim_vcfExtensionCaseInsensitive(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [ "##foo", "##MuTect=123", "##FORMAT=<ID=AF,...>", "#chrom", record1 ] reader1 = MockFileReader("fileA.VcF", content1) file_readers = [reader1] caller = mutect.Mutect() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(0, len(unrecognized_readers)) self.assertEquals(1, len(vcf_readers))
def test_claim_metaheaderRecognizesMutectV4x(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [ "##foo", '##GATKCommandLine=<ID=Mutect2,CommandLine="Mutect2 ...">', "##FORMAT=<ID=AF,...>", "#chrom", record1 ] reader1 = MockFileReader("fileB.vcf", content1) file_readers = [reader1] caller = mutect.Mutect() unrecognized_readers, vcf_readers = caller.claim(file_readers) self.assertEquals(0, len(unrecognized_readers)) self.assertEquals(1, len(vcf_readers))
def test_vcf_records_SomHcFileSNP(self): record1 = vcf.VcfRecord(chrom="chr1", pos="21", ref="A", alt="G", vcf_filter="PASS") record2 = vcf.VcfRecord(chrom="chr1", pos="22", ref="A", alt="T", vcf_filter="PASS") vcf_reader = MockVcfReader(records=[record1, record2]) content1 = ["chrom\tposition", "chr1\t21", "chr1\t22"] somatic_hc_reader = MockFileReader("fileA.Somatic.hc.fpfilter.pass", content1) varscan_vcf_reader = varscan._VarscanVcfReader(vcf_reader, somatic_hc_reader) vcf_records = [record for record in varscan_vcf_reader.vcf_records()] self.assertEquals(2, len(vcf_records)) self.assertIn("PASS", vcf_records[0].filter) self.assertIn("PASS", vcf_records[1].filter)
def test_validate_vcf_hc_pairs(self): self.caller._validate_vcf_hc_pairs([ (MockFileReader("A.vcf"), MockFileReader("A.hc")), (MockFileReader("B.vcf"), MockFileReader("B.hc")) ]) self.ok()
def test_validate_vcf_hc_pairs_okIfNoHcFiles(self): self.caller._validate_vcf_hc_pairs([(MockFileReader("A.vcf"), None), (MockFileReader("B.vcf"), None)]) self.ok()
def test_validate_filter_file_validFile(self): file_reader = MockFileReader("p1.hc.fpfilter.pass", ["chrom\tposition"]) caller = varscan.Varscan() valid_reader = caller._validate_filter_file(file_reader) self.assertEquals("p1.hc.fpfilter.pass", valid_reader.file_name)
def test_validate_filter_file_invalidFile(self): file_reader = MockFileReader("p1.hc.fpfilter.pass", ["chrom\tpos\tref"]) caller = varscan.Varscan() valid_reader = caller._validate_filter_file(file_reader) self.assertEquals(None, valid_reader)
def test_claim_vcfAnd6InvalidFilterFiles(self): #pylint: disable=too-many-locals record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = [self.entab("chrom|pos|ref|alt"), record1] content2 = ["##foo", "##source=VarScan2", "#chrom", record1] reader1 = MockFileReader("patientA.snp.Somatic.hc.fpfilter.pass", content1) reader2 = MockFileReader("patientA.snp.vcf", content2) reader3 = MockFileReader("patientB.snp.Somatic.hc.fpfilter.pass", content1) reader4 = MockFileReader("patientB.snp.vcf", content2) reader5 = MockFileReader("patientC.snp.Somatic.hc.fpfilter.pass", content1) reader6 = MockFileReader("patientC.snp.vcf", content2) reader7 = MockFileReader("patientD.snp.Somatic.hc.fpfilter.pass", content1) reader8 = MockFileReader("patientD.snp.vcf", content2) reader9 = MockFileReader("patientE.snp.Somatic.hc.fpfilter.pass", content1) reader10 = MockFileReader("patientE.snp.vcf", content2) reader11 = MockFileReader("patientF.snp.Somatic.hc.fpfilter.pass", content1) reader12 = MockFileReader("patientF.snp.vcf", content2) file_readers = [ reader1, reader2, reader3, reader4, reader5, reader6, reader7, reader8, reader9, reader10, reader11, reader12 ] caller = varscan.Varscan() self.assertRaisesRegexp( utils.JQException, r"The \[6\] input files \[.*\(1 file\(s\) omitted\)\] match high-confidence file names, but the file header is invalid or missing. Review inputs and try again.", caller.claim, file_readers)