def setUp(self): self.ref_length_file = StringIO(TRANSCRIPT_LENGTHS) self.annotation_file = StringIO(TRANSCRIPT_ANNOTATION) self.annotation_lines = TRANSCRIPT_ANNOTATION.split("\n") self.alignment_file = StringIO(READ_SET_1) self.metadata_file = StringIO(METADATA_EXPERIMENT_STR_1) self.handle = h5py.File(BytesIO(), "w") create.initialize(self.handle, "merzifon", "hg38") create.set_reference_names_and_lengths(self.handle, self.ref_length_file) create.set_annotation(self.handle, self.annotation_lines) ref_names = get_reference_names(self.handle) ref_lengths = get_reference_lengths(self.handle) region_coordinates = get_region_boundaries(self.handle) exp_handle = self.handle[EXPERIMENTS_name]["merzifon"] create_experiment.create_experiment( ribo_exp_handle=exp_handle, experiment_name="merzifon", alignment_file_handle=self.alignment_file, ref_names=ref_names, ref_lengths=ref_lengths, region_coordinates=region_coordinates, metagene_radius=METAGENE_RADIUS, left_span=LEFT_SPAN, right_span=RIGHT_SPAN, length_min=2, length_max=5, metadata=self.metadata_file, store_coverage=True, nprocess=4)
def test_get_reference_names(self): create.initialize(self.handle, "merzifon", reference_name="appris_human_v2") create.set_reference_names_and_lengths(self.handle, self.ref_len_file) create.set_annotation(h5_handle=self.handle, annotation_lines=self.annotation_lines) ref_array = self.handle[REFERENCE_name][REF_ANNOTATION_NAME][...] self.assertEqual(ref_array[0, 0], 50) self.assertEqual(ref_array[1, 0], 0) self.assertEqual(ref_array[1, 2], 875) self.assertEqual(ref_array[2, 1], 1251) self.assertEqual(ref_array[3, 2], 565)
def test_set_reference_names_and_lengths(self): create.initialize(self.handle, "merzifon", reference_name="appris_human_v2") create.set_reference_names_and_lengths(self.handle, self.ref_len_file) ref_h = self.handle[REFERENCE_name] ref_names = ref_h[REF_DG_REFERENCE_NAMES][...].astype(str) self.assertIn("GAPDH", ref_names[0]) self.assertIn("BRCA", ref_names[3]) self.assertEqual(4, len(ref_names)) ref_lengths = ref_h[REF_DG_REFERENCE_LENGTHS][...] self.assertEqual(875, ref_lengths[1]) self.assertEqual(1462, ref_lengths[2]) self.assertNotEqual(1462, ref_lengths[0])
def test_set_coverage_vectors(self): create.initialize(self.handle, "merzifon", reference_name="appris_human_v2") create.set_reference_names_and_lengths(self.handle, self.ref_len_file) create.set_annotation(h5_handle=self.handle, annotation_lines=self.annotation_lines) create.set_coverage_vectors(self.handle, 5) start_site_cov = self.handle[REFERENCE_name][REF_DG_START_SITE_COV][ ...] stop_site_cov = self.handle[REFERENCE_name][REF_DG_STOP_SITE_COV][...] self.assertListEqual(list(start_site_cov), [2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4]) self.assertListEqual(list(stop_site_cov), [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3]) self.assertTrue( list(stop_site_cov) != [6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3])
def test_start_site_coverage_from_ribo(self): create.initialize(self.handle, "merzifon", reference_name = "appris_human_v2") create.set_reference_names_and_lengths( self.handle, self.ref_len_file) create.set_annotation( h5_handle = self.handle, annotation_lines = TRANSCRIPT_ANNOTATION.split("\n") ) ribo_annotation = get_region_boundaries(self.handle) ref_names = get_reference_names(self.handle) ref_lengths = get_reference_lengths(self.handle) input_stream = StringIO(READ_SET_1) coverage = find_coverage(input_stream, ref_names , ref_lengths ) site_coverage = find_site_coverage(coverage = coverage, radius = 2, annotation = ribo_annotation, site_type = "start") comparison = (ACTUAL_START_SITE_COVERAGE == site_coverage) self.assertTrue( np.all( comparison ) )
def test_init(self): create.initialize(self.handle, "merzifon", reference_name="appris_human_v2") self.assertIn("merzifon", self.handle[EXPERIMENTS_name].keys()) self.assertIn(REFERENCE_name, self.handle.keys())
def setUp(self): self.tmp_files = list() self.ref_len_file = StringIO(GENERIC_TRANSCRIPT_LENGTHS) self.annotation_file = StringIO(GENERIC_TRANSCRIPT_ANNOTATION) self.alignment_file_1 = StringIO(READ_SET_1) self.alignment_file_2 = StringIO(READ_SET_2) self.handle_io = BytesIO() self.handle = h5py.File(self.handle_io, "w") self.handle_2 = h5py.File(BytesIO(), "w") self.h5_handle = h5py.File(BytesIO(), "w") initialize(self.h5_handle, experiment_name="dummy", reference_name="test_ref") (ref_names, ref_lengths) = \ set_reference_names_and_lengths(self.h5_handle , self.ref_len_file) self.ref_len_file.seek(0) self.annotation_lines = GENERIC_TRANSCRIPT_ANNOTATION.split("\n") create.create_ribo(ribo=self.handle, experiment_name="experiment-1", alignment_file=self.alignment_file_1, reference_name="hg38", lengths_file=self.ref_len_file, annotation_file=self.annotation_file, metagene_radius=METAGENE_RADIUS, left_span=LEFT_SPAN, right_span=RIGHT_SPAN, length_min=LENGTH_MIN, length_max=LENGTH_MAX, store_coverage=True, nprocess=NPROCESS, tmp_file_prefix="") self.handle.close() self.sample_ribo = Ribo(self.handle_io) # Get the region counts self.cds_counts = \ self.sample_ribo.get_region_counts( region_name = CDS_name, sum_references = False) self.utr5_counts = \ self.sample_ribo.get_region_counts( region_name = UTR5_name, sum_references = False) self.utr3_counts = \ self.sample_ribo.get_region_counts( region_name = UTR3_name, sum_references = False) self.utr5j_counts = \ self.sample_ribo.get_region_counts( region_name = UTR5_JUNCTION_name, sum_references = False) self.utr3j_counts = \ self.sample_ribo.get_region_counts( region_name = UTR3_JUNCTION_name, sum_references = False)