def test_simple_seg_file_input(self): """Test that we can read in a seg file, do no annotation, and output as SIMPLE_TSV""" inputFilename = "testdata/seg/Patient0.seg.txt" output_filename = "out/test_simple_seg_file_input.tsv" if os.path.exists(output_filename): os.remove(output_filename) ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config') segs = ic.createMutations() i = 1 for i,seg in enumerate(segs): pass self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1)) ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config') segs = ic.createMutations() outputRenderer = SimpleOutputRenderer(output_filename, '') outputRenderer.renderMutations(segs) # Now check the output output_reader = GenericTsvReader(output_filename) required_cols = ["Sample", "Num_Probes", "Segment_Mean"] headers = output_reader.getFieldNames() for rcol in required_cols: self.assertTrue(rcol in headers) for line_dict in output_reader: self.assertTrue(line_dict['start'] is not None) self.assertTrue(line_dict['start'].strip() != "") self.assertTrue(line_dict['end'] is not None) self.assertTrue(line_dict['end'].strip() != "")
def testSampleNameSelectorWithMaf(self): input = MafliteInputMutationCreator("testdata/maflite/tiny_maflite.maf.txt") first_mut = next(input.createMutations()) s = SampleNameSelector(first_mut) for mut in input.createMutations(): self.assertEqual("Patient0-Normal-Patient0-Tumor", s.getSampleName(mut)) self.assertEqual(s.getAnnotationSource(),"OUTPUT") self.assertEqual(s.getOutputAnnotationName(), MutUtils.SAMPLE_NAME_ANNOTATION_NAME)
def testSampleNameSelectorWithMaf(self): input = MafliteInputMutationCreator( "testdata/maflite/tiny_maflite.maf.txt") first_mut = next(input.createMutations()) s = SampleNameSelector(first_mut) for mut in input.createMutations(): self.assertEqual("Patient0-Normal-Patient0-Tumor", s.getSampleName(mut)) self.assertEqual(s.getAnnotationSource(), "OUTPUT") self.assertEqual(s.getOutputAnnotationName(), MutUtils.SAMPLE_NAME_ANNOTATION_NAME)
def testFullIndelVcf(self): """ Perform test of a Indel maflite all the way through TCGA VCF creation """ outputFilename = "out/TCGAVCFTest.indel.vcf" callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) # Check that the deletions have position decremented by one from what is present in the maflite # Checking that 1 36643701 in the maflite (a deletion) becomes 1 36643700 in the vcf, but that the others are # the same. maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) vcf_pos = [int(rec.POS) for rec in vcf_reader] for m in muts: # If the variant is a deletion, then the vcf position should be the same as maflite minus one. Otherwise, the same. is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".") if is_variant_deletion: self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start) else: self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
def testFullSnpVcf(self): """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created. """ outputFilename = "out/TCGAVCFTest.snp.vcf" callStatsIn = MafliteInputMutationCreator( "testdata/Test.call_stats.trim.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) maflite_ic = MafliteInputMutationCreator( "testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) for i, m in enumerate(muts): rec = vcf_reader.next() qual = rec.QUAL # All records should have QUAL with a value (i.e. NOT ".") self.assertIsNotNone(qual)
def test_simple_seg_file_annotations(self): """Test that we can read in a seg file, do GENCODE annotation, and output as SIMPLE_TSV""" inputFilename = "testdata/seg/Patient0.seg.txt" output_filename = "out/test_simple_seg_file_annotations.tsv" if os.path.exists(output_filename): os.remove(output_filename) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() i = 1 for i, seg in enumerate(segs): pass self.assertTrue( (i + 1) == 27, "Found %d segments when there should have been 27." % (i + 1)) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() gencode_ds = TestUtils._create_test_gencode_v19_ds( "out/seg_file_gencode_ds") annotator = Annotator() segs_annotated = [] for seg in segs: segs_annotated.append(gencode_ds.annotate_segment(seg)) outputRenderer = SimpleOutputRenderer(output_filename, '') outputRenderer.renderMutations(segs_annotated.__iter__()) # Now check the output output_reader = GenericTsvReader(output_filename) required_cols = ["Sample", "Num_Probes", "Segment_Mean"] headers = output_reader.getFieldNames() for rcol in required_cols: self.assertTrue(rcol in headers) for line_dict in output_reader: self.assertTrue(line_dict['start'] is not None) self.assertTrue(line_dict['start'].strip() != "") self.assertTrue(line_dict['end'] is not None) self.assertTrue(line_dict['end'].strip() != "") self.assertTrue("genes" in line_dict.keys())
def test_alt1_vs_alt2(self): """Test that we pick up the alternate that is different from the reference when both are specified""" ic = MafliteInputMutationCreator("testdata/maflite/alt1_vs_alt2.maflite") muts = ic.createMutations() ctr = 0 for m in muts: ctr += 1 self.assertTrue(m.alt_allele == "C", "Did not properly populate the alternate allele in line " + str(ctr) + " " + m.alt_allele)
def testChromosomeM(self): """ Make sure that the chromosome created as M, rather than MT.""" tmp = MafliteInputMutationCreator("testdata/maflite/chrM.maf.txt") muts = tmp.createMutations() for m in muts: self.assertTrue( m.chr == "M", "mitochondria chromosome should be M, not " + m.chr)
def testSimpleRead(self): """ Read a good maflite file and make sure that each mutation validates """ tmp = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt", 'configs/maflite_input.config') muts = tmp.createMutations() # If no exception is thrown, then this test passes. for m in muts: MutUtils.validateMutation(m)
def testTCGAMAFAsInput(self): """ Test that we can take in a TCGA MAF (using MAFLITE), do no annotations, and still render it properly """ tmp = MafliteInputMutationCreator("testdata/maf/Patient0.maf.annotated", 'configs/maflite_input.config') muts = tmp.createMutations() outputFilename = "out/testTCGAMAFAsInput.tsv" outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config') outputRenderer.renderMutations(muts, tmp.getComments())
def testNoUnknownAnnotations(self): """ Make sure that the gaf 3.0 datasource does not annotate anything with source set to Unknown """ inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt') gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config) mutations = inputCreator.createMutations() for m in mutations: m = gafDatasource.annotate_mutation(m) MutUtils.validateMutation(m) unknownAnnotations = MutUtils.getUnknownAnnotations(m) self.assertTrue(len(unknownAnnotations) == 0, "Unknown annotations exist in mutation: " + str(unknownAnnotations))
def testNumberOfMuts(self): """ Make sure that the proper number of mutations were generated """ inputFilename = "testdata/maflite/Patient0.snp.maf.txt" tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config') muts = tmp.createMutations() numMutsInput = len(file(inputFilename,'r').readlines()) - 1 ctr = 0 for m in muts: ctr += 1 self.assertEqual(ctr, numMutsInput, "Did not see the proper number of mutations.")
def testSimpleRead(self): """ Read a good maflite file and make sure that each mutation validates """ tmp = MafliteInputMutationCreator( "testdata/maflite/Patient0.indel.maf.txt", None, 'configs/maflite_input.config') muts = tmp.createMutations() # If no exception is thrown, then this test passes. for m in muts: MutUtils.validateMutation(m)
def testNumberOfMuts(self): """ Make sure that the proper number of mutations were generated """ inputFilename = "testdata/maflite/Patient0.snp.maf.txt" tmp = MafliteInputMutationCreator(inputFilename) muts = tmp.createMutations() numMutsInput = len(file(inputFilename, 'r').readlines()) - 1 ctr = 0 for m in muts: ctr += 1 self.assertEqual(ctr, numMutsInput, "Did not see the proper number of mutations.")
def testTCGAMAFAsInput(self): """ Test that we can take in a TCGA MAF (using MAFLITE), do no annotations, and still render it properly """ tmp = MafliteInputMutationCreator( "testdata/maf/Patient0.maf.annotated", None, 'configs/maflite_input.config') muts = tmp.createMutations() outputFilename = "out/testTCGAMAFAsInput.tsv" outputRenderer = TcgaMafOutputRenderer( outputFilename, 'configs/tcgaMAF2.4_output.config') outputRenderer.renderMutations(muts, tmp.getComments())
def test_alt1_vs_alt2(self): """Test that we pick up the alternate that is different from the reference when both are specified""" ic = MafliteInputMutationCreator( "testdata/maflite/alt1_vs_alt2.maflite") muts = ic.createMutations() ctr = 0 for m in muts: ctr += 1 self.assertTrue( m.alt_allele == "C", "Did not properly populate the alternate allele in line " + str(ctr) + " " + m.alt_allele)
def testChrGLs(self): """ Test that mutations on unaligned transcripts can be annotated properly. I.e. when chromosome = GL.....""" inputCreator = MafliteInputMutationCreator('testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config") gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config) mutations = inputCreator.createMutations() for m in mutations: try: m = gafDatasource.annotate_mutation(m) MutUtils.validateMutation(m) except Exception as e: # Fail this test because an exception was thrown self.assertTrue(False, "Erroneous exception was thrown: " + str(e) + "\n" + traceback.format_exc()) self.assertTrue(m['gene'] != '')
def testNoLostMutations(self): """ Does a simple gaf datasource annotation run and makes sure that no mutations were lost """ inputFilename = 'testdata/maflite/Patient0.snp.maf.txt' inputCreator = MafliteInputMutationCreator(inputFilename, "configs/maflite_input.config") gafDatasource = TestUtils.createTranscriptProviderDatasource(self.config) numMutsInput = len(file(inputFilename, 'r').readlines()) - 1 mutations = inputCreator.createMutations() ctr = 0 for m in mutations: m = gafDatasource.annotate_mutation(m) MutUtils.validateMutation(m) ctr += 1 self.assertEqual(ctr, numMutsInput, "Gaf data source altered mutation count.")
def testNoUnknownAnnotations(self): """ Make sure that the gaf 3.0 datasource does not annotate anything with source set to Unknown """ inputCreator = MafliteInputMutationCreator( 'testdata/maflite/Patient0.snp.maf.txt') gafDatasource = TestUtils.createTranscriptProviderDatasource( self.config) mutations = inputCreator.createMutations() for m in mutations: m = gafDatasource.annotate_mutation(m) MutUtils.validateMutation(m) unknownAnnotations = MutUtils.getUnknownAnnotations(m) self.assertTrue( len(unknownAnnotations) == 0, "Unknown annotations exist in mutation: " + str(unknownAnnotations))
def testMulticoreAnnotateFromChunkedFile(self): #TODO: Add unit test that Mutation data is pickle-able inputFile = "testdata/maflite/Patient0.snp.maf.txt" outputFile = "out/testGAFMulticorePatient0.snp.maf.txt" chunkSize = 200 numChunks = 4 gafDatasource = TestUtils.createGafDatasourceProxy(self.config) ic = MafliteInputMutationCreator(inputFile) oc = SimpleOutputRenderer(outputFile) # createChunks muts = ic.createMutations() allAnnotatedChunksFlat = [] are_mutations_remaining = True p = LoggingPool(processes=numChunks) while are_mutations_remaining: chunks = [] for j in xrange(0, numChunks): chunk = [] for i in xrange(0, chunkSize): try: chunk.append(muts.next()) except StopIteration: are_mutations_remaining = False break chunks.append((chunk, gafDatasource)) annotatedChunks = p.map(annotate_mutations_global, chunks) annotatedChunksFlat = self._flattenChunks(annotatedChunks) allAnnotatedChunksFlat.append(annotatedChunksFlat) p.close() p.join() annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat) ctr = 0 oc.renderMutations(annotatedMuts, Metadata()) tsvReader = GenericTsvReader(outputFile) for line in tsvReader: ctr += 1 self.assertTrue(ctr == 730, "Should have read 730 variants, but read " + str(ctr))
def testNoLostMutations(self): """ Does a simple gaf datasource annotation run and makes sure that no mutations were lost """ inputFilename = 'testdata/maflite/Patient0.snp.maf.txt' inputCreator = MafliteInputMutationCreator( inputFilename, "configs/maflite_input.config") gafDatasource = TestUtils.createTranscriptProviderDatasource( self.config) numMutsInput = len(file(inputFilename, 'r').readlines()) - 1 mutations = inputCreator.createMutations() ctr = 0 for m in mutations: m = gafDatasource.annotate_mutation(m) MutUtils.validateMutation(m) ctr += 1 self.assertEqual(ctr, numMutsInput, "Gaf data source altered mutation count.")
def testChrGLs(self): """ Test that mutations on unaligned transcripts can be annotated properly. I.e. when chromosome = GL.....""" inputCreator = MafliteInputMutationCreator( 'testdata/maflite/chrGLs.maf.tsv', "configs/maflite_input.config") gafDatasource = TestUtils.createTranscriptProviderDatasource( self.config) mutations = inputCreator.createMutations() for m in mutations: try: m = gafDatasource.annotate_mutation(m) MutUtils.validateMutation(m) except Exception as e: # Fail this test because an exception was thrown self.assertTrue( False, "Erroneous exception was thrown: " + str(e) + "\n" + traceback.format_exc()) self.assertTrue(m['gene'] != '')
def testChromosomeM(self): """ Make sure that the chromosome created as M, rather than MT.""" tmp = MafliteInputMutationCreator("testdata/maflite/chrM.maf.txt", 'configs/maflite_input.config') muts = tmp.createMutations() for m in muts: self.assertTrue(m.chr=="M", "mitochondria chromosome should be M, not " + m.chr)