def testHeaderCreation(self): """Test that a tcga vcf header can be generated, even from a blank mutation. """ vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt") m = MutationData() m.createAnnotation('center', "broad.mit.edu") hdr = vcfOR.createVcfHeader(m) self.assertTrue(hdr is not None) self.assertTrue(hdr <> "") self.assertTrue(hdr.find("broad.mit.edu") <> -1, "Could not find string that should have been in header.")
def testHeaderCreation(self): """Test that a tcga vcf header can be generated, even from a blank mutation. """ vcfOR = TcgaVcfOutputRenderer("out/TCGAVCFHeader.out.txt") m = MutationData() m.createAnnotation('center', "broad.mit.edu") hdr = vcfOR.createVcfHeader(m) self.assertTrue(hdr is not None) self.assertTrue(hdr <> "") self.assertTrue(hdr.find("broad.mit.edu") <> -1, "Could not find string that should have been in header.")
def testChromRendering(self): """Make sure that the chromosome rendering in TCGA VCF is correct: "1" --> "1", "GLXXXX.Y" --> GLXXXX.Y, not <GLXXXX.Y>""" vcfOR = TcgaVcfOutputRenderer("out/TCGAVCF.empty.out.txt") testChrs = ["21", "MT", "GL1234.4", "1"] gt = ["21", "MT", "GL1234.4", "1"] ctr = 0 for t in testChrs: val = vcfOR._renderChrom(t) self.assertTrue(val == gt[ctr], "Chrom value did not match ground truth: " + t + " --> " + val + " GT: " + gt[ctr]) ctr += 1
def testChromRendering(self): """Make sure that the chromosome rendering in TCGA VCF is correct: "1" --> "1" , "GLXXXX.Y" --> <GLXXXX.Y>""" vcfOR = TcgaVcfOutputRenderer("out/TCGAVCF.empty.out.txt") testChrs = ["21", "MT", "GL1234.4", "1"] gt = ["21", "MT", "<GL1234.4>", "1"] ctr = 0 for t in testChrs: val = vcfOR._renderChrom(t) self.assertTrue(val == gt[ctr], "Chrom value did not match ground truth: " + t + " --> " + val + " GT: " + gt[ctr]) ctr += 1
def testFullIndelVcf(self): """ Perform test of a Indel maflite all the way through TCGA VCF creation """ outputFilename = "out/TCGAVCFTest.indel.vcf" callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) # Check that the deletions have position decremented by one from what is present in the maflite # Checking that 1 36643701 in the maflite (a deletion) becomes 1 36643700 in the vcf, but that the others are # the same. maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) vcf_pos = [int(rec.POS) for rec in vcf_reader] for m in muts: # If the variant is a deletion, then the vcf position should be the same as maflite minus one. Otherwise, the same. is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".") if is_variant_deletion: self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start) else: self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
def testFullSnpVcf(self): """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created. """ outputFilename = "out/TCGAVCFTest.snp.vcf" callStatsIn = MafliteInputMutationCreator( "testdata/Test.call_stats.trim.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) maflite_ic = MafliteInputMutationCreator( "testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) for i, m in enumerate(muts): rec = vcf_reader.next() qual = rec.QUAL # All records should have QUAL with a value (i.e. NOT ".") self.assertIsNotNone(qual)
def _testInfoField(self, filter): outputFilename = "out/TCGAVCFTest.indel.vcf.dummy" vcfOR = TcgaVcfOutputRenderer(outputFilename) mq0 = "0" ss = "Somatic" m = MutationData() m.createAnnotation('t_ref_count', '20') m.createAnnotation('t_alt_count', '25') m.createAnnotation('n_ref_count', '100') m.createAnnotation('n_alt_count', '150') m.createAnnotation('dbSNP_RS', '') m.createAnnotation('gene', 'FAKE') m.createAnnotation('variant_type', 'SNP') m.createAnnotation('variant_classification', 'Missense') m.createAnnotation('transcript_id', 'tid001') infoData = vcfOR._generateInfoField(m, filter, mq0, ss) return infoData
def _testInfoField(self, filter): outputFilename = "out/TCGAVCFTest.indel.vcf.dummy" vcfOR = TcgaVcfOutputRenderer(outputFilename) mq0 = "0" ss = "Somatic" m = MutationData() m.createAnnotation('t_ref_count', '20') m.createAnnotation('t_alt_count', '25') m.createAnnotation('n_ref_count', '100') m.createAnnotation('n_alt_count', '150') m.createAnnotation('dbSNP_RS', '') m.createAnnotation('gene', 'FAKE') m.createAnnotation('variant_type', 'SNP') m.createAnnotation('variant_classification', 'Missense') m.createAnnotation('transcript_id', 'tid001') infoData = vcfOR._generateInfoField(m, filter, mq0, ss) return infoData
def testMafInput(self): """Make sure that we can render a TCGA VCF from a TCGA MAF -- using no datasources""" inputFile = "testdata/maf/Patient1.snp.maf.annotated" outputFilename = "out/maf2tcgavcf.vcf" mafIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) annotator = Annotator() annotator.setInputCreator(mafIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testFullSnpVcf(self): """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created. """ outputFilename = "out/TCGAVCFTest.snp.vcf" callStatsIn = MafliteInputMutationCreator("testdata/Test.call_stats.trim.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename))
def testEmptyInput(self): """Make sure that we can generate an empty vcf from an empty maflite""" inputFile = "testdata/maflite/empty.maflite" outputFilename = "out/empty.vcf" callStatsIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testAnotherFullSNP(self): """Test SNP call stats . Just make sure no exception is thrown.""" inputFile = "testdata/maflite/Another.call_stats.txt" outputFilename = "out/Another.call_stats.out.vcf" callStatsIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testPopulatedButNullValuesInInitNLod(self): """Test that if init_n_lod is "." or "", there is no error """ m = MutationData() m.createAnnotation("init_n_lod", "") outputFilename = "out/blank.vcf" vcfOR = TcgaVcfOutputRenderer(outputFilename) lod = vcfOR._extract_lod(m,"init_n_lod") self.assertEqual(lod, 50) m["init_n_lod"] = '.' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, 50) m["init_n_lod"] = '6' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, 6) m["init_n_lod"] = '6.8' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, 6) m["init_n_lod"] = '-12.8' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, -12) m.createAnnotation("t_lod_fstar", "") lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 50) m["t_lod_fstar"] = '.' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 50) m["t_lod_fstar"] = '6' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 6) m["t_lod_fstar"] = '6.8' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 6) m["t_lod_fstar"] = '-12.8' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, -12)
def testPopulatedButNullValuesInInitNLod(self): """Test that if init_n_lod is "." or "", there is no error """ m = MutationData() m.createAnnotation("init_n_lod", "") outputFilename = "out/blank.vcf" vcfOR = TcgaVcfOutputRenderer(outputFilename) lod = vcfOR._extract_lod(m,"init_n_lod") self.assertEqual(lod, 50) m["init_n_lod"] = '.' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, 50) m["init_n_lod"] = '6' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, 6) m["init_n_lod"] = '6.8' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, 6) m["init_n_lod"] = '-12.8' lod = vcfOR._extract_lod(m, "init_n_lod") self.assertEqual(lod, -12) m.createAnnotation("t_lod_fstar", "") lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 50) m["t_lod_fstar"] = '.' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 50) m["t_lod_fstar"] = '6' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 6) m["t_lod_fstar"] = '6.8' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, 6) m["t_lod_fstar"] = '-12.8' lod = vcfOR._extract_lod(m, "t_lod_fstar") self.assertEqual(lod, -12)