def test_simple_seg_file_annotations(self): """Test that we can read in a seg file, do GENCODE annotation, and output as SIMPLE_TSV""" inputFilename = "testdata/seg/Patient0.seg.txt" output_filename = "out/test_simple_seg_file_annotations.tsv" if os.path.exists(output_filename): os.remove(output_filename) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() i = 1 for i, seg in enumerate(segs): pass self.assertTrue( (i + 1) == 27, "Found %d segments when there should have been 27." % (i + 1)) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() gencode_ds = TestUtils._create_test_gencode_v19_ds( "out/seg_file_gencode_ds") annotator = Annotator() segs_annotated = [] for seg in segs: segs_annotated.append(gencode_ds.annotate_segment(seg)) outputRenderer = SimpleOutputRenderer(output_filename, '') outputRenderer.renderMutations(segs_annotated.__iter__()) # Now check the output output_reader = GenericTsvReader(output_filename) required_cols = ["Sample", "Num_Probes", "Segment_Mean"] headers = output_reader.getFieldNames() for rcol in required_cols: self.assertTrue(rcol in headers) for line_dict in output_reader: self.assertTrue(line_dict['start'] is not None) self.assertTrue(line_dict['start'].strip() != "") self.assertTrue(line_dict['end'] is not None) self.assertTrue(line_dict['end'].strip() != "") self.assertTrue("genes" in line_dict.keys())
def test_simple_seg_file_annotations(self): """Test that we can read in a seg file, do GENCODE annotation, and output as SIMPLE_TSV""" inputFilename = "testdata/seg/Patient0.seg.txt" output_filename = "out/test_simple_seg_file_annotations.tsv" if os.path.exists(output_filename): os.remove(output_filename) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() i = 1 for i,seg in enumerate(segs): pass self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1)) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() gencode_ds = TestUtils._create_test_gencode_v19_ds("out/seg_file_gencode_ds") annotator = Annotator() segs_annotated = [] for seg in segs: segs_annotated.append(gencode_ds.annotate_segment(seg)) outputRenderer = SimpleOutputRenderer(output_filename, '') outputRenderer.renderMutations(segs_annotated.__iter__()) # Now check the output output_reader = GenericTsvReader(output_filename) required_cols = ["Sample", "Num_Probes", "Segment_Mean"] headers = output_reader.getFieldNames() for rcol in required_cols: self.assertTrue(rcol in headers) for line_dict in output_reader: self.assertTrue(line_dict['start'] is not None) self.assertTrue(line_dict['start'].strip() != "") self.assertTrue(line_dict['end'] is not None) self.assertTrue(line_dict['end'].strip() != "") self.assertTrue("genes" in line_dict.keys())