Ejemplo n.º 1
0
    def setUp(self):
        self.ref_length_file = StringIO(TRANSCRIPT_LENGTHS)
        self.annotation_file = StringIO(TRANSCRIPT_ANNOTATION)
        self.annotation_lines = TRANSCRIPT_ANNOTATION.split("\n")
        self.alignment_file = StringIO(READ_SET_1)
        self.metadata_file = StringIO(METADATA_EXPERIMENT_STR_1)

        self.handle = h5py.File(BytesIO(), "w")
        create.initialize(self.handle, "merzifon", "hg38")
        create.set_reference_names_and_lengths(self.handle,
                                               self.ref_length_file)
        create.set_annotation(self.handle, self.annotation_lines)

        ref_names = get_reference_names(self.handle)
        ref_lengths = get_reference_lengths(self.handle)
        region_coordinates = get_region_boundaries(self.handle)

        exp_handle = self.handle[EXPERIMENTS_name]["merzifon"]

        create_experiment.create_experiment(
            ribo_exp_handle=exp_handle,
            experiment_name="merzifon",
            alignment_file_handle=self.alignment_file,
            ref_names=ref_names,
            ref_lengths=ref_lengths,
            region_coordinates=region_coordinates,
            metagene_radius=METAGENE_RADIUS,
            left_span=LEFT_SPAN,
            right_span=RIGHT_SPAN,
            length_min=2,
            length_max=5,
            metadata=self.metadata_file,
            store_coverage=True,
            nprocess=4)
Ejemplo n.º 2
0
    def test_get_reference_names(self):
        create.initialize(self.handle,
                          "merzifon",
                          reference_name="appris_human_v2")
        create.set_reference_names_and_lengths(self.handle, self.ref_len_file)

        create.set_annotation(h5_handle=self.handle,
                              annotation_lines=self.annotation_lines)

        ref_array = self.handle[REFERENCE_name][REF_ANNOTATION_NAME][...]
        self.assertEqual(ref_array[0, 0], 50)
        self.assertEqual(ref_array[1, 0], 0)
        self.assertEqual(ref_array[1, 2], 875)
        self.assertEqual(ref_array[2, 1], 1251)
        self.assertEqual(ref_array[3, 2], 565)
Ejemplo n.º 3
0
    def test_set_reference_names_and_lengths(self):
        create.initialize(self.handle,
                          "merzifon",
                          reference_name="appris_human_v2")
        create.set_reference_names_and_lengths(self.handle, self.ref_len_file)

        ref_h = self.handle[REFERENCE_name]
        ref_names = ref_h[REF_DG_REFERENCE_NAMES][...].astype(str)

        self.assertIn("GAPDH", ref_names[0])
        self.assertIn("BRCA", ref_names[3])
        self.assertEqual(4, len(ref_names))

        ref_lengths = ref_h[REF_DG_REFERENCE_LENGTHS][...]
        self.assertEqual(875, ref_lengths[1])
        self.assertEqual(1462, ref_lengths[2])
        self.assertNotEqual(1462, ref_lengths[0])
Ejemplo n.º 4
0
    def test_set_coverage_vectors(self):

        create.initialize(self.handle,
                          "merzifon",
                          reference_name="appris_human_v2")

        create.set_reference_names_and_lengths(self.handle, self.ref_len_file)
        create.set_annotation(h5_handle=self.handle,
                              annotation_lines=self.annotation_lines)

        create.set_coverage_vectors(self.handle, 5)

        start_site_cov = self.handle[REFERENCE_name][REF_DG_START_SITE_COV][
            ...]
        stop_site_cov = self.handle[REFERENCE_name][REF_DG_STOP_SITE_COV][...]
        self.assertListEqual(list(start_site_cov),
                             [2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4])
        self.assertListEqual(list(stop_site_cov),
                             [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3])
        self.assertTrue(
            list(stop_site_cov) != [6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3])
Ejemplo n.º 5
0
    def test_start_site_coverage_from_ribo(self):
        create.initialize(self.handle, "merzifon",
                          reference_name = "appris_human_v2")
        create.set_reference_names_and_lengths(
                                       self.handle,
                                       self.ref_len_file)
        create.set_annotation(
            h5_handle       = self.handle,
            annotation_lines = TRANSCRIPT_ANNOTATION.split("\n") )

        ribo_annotation = get_region_boundaries(self.handle)
        ref_names       = get_reference_names(self.handle)
        ref_lengths     = get_reference_lengths(self.handle)
        input_stream    = StringIO(READ_SET_1)
        coverage        = find_coverage(input_stream,
                                        ref_names , ref_lengths )

        site_coverage = find_site_coverage(coverage   = coverage,
                                           radius     = 2,
                                           annotation = ribo_annotation,
                                           site_type  = "start")

        comparison = (ACTUAL_START_SITE_COVERAGE == site_coverage)
        self.assertTrue( np.all( comparison ) )
Ejemplo n.º 6
0
 def test_init(self):
     create.initialize(self.handle,
                       "merzifon",
                       reference_name="appris_human_v2")
     self.assertIn("merzifon", self.handle[EXPERIMENTS_name].keys())
     self.assertIn(REFERENCE_name, self.handle.keys())
Ejemplo n.º 7
0
    def setUp(self):

        self.tmp_files = list()

        self.ref_len_file = StringIO(GENERIC_TRANSCRIPT_LENGTHS)
        self.annotation_file = StringIO(GENERIC_TRANSCRIPT_ANNOTATION)
        self.alignment_file_1 = StringIO(READ_SET_1)
        self.alignment_file_2 = StringIO(READ_SET_2)

        self.handle_io = BytesIO()
        self.handle = h5py.File(self.handle_io, "w")
        self.handle_2 = h5py.File(BytesIO(), "w")
        self.h5_handle = h5py.File(BytesIO(), "w")

        initialize(self.h5_handle,
                   experiment_name="dummy",
                   reference_name="test_ref")

        (ref_names, ref_lengths) = \
           set_reference_names_and_lengths(self.h5_handle , self.ref_len_file)
        self.ref_len_file.seek(0)

        self.annotation_lines = GENERIC_TRANSCRIPT_ANNOTATION.split("\n")

        create.create_ribo(ribo=self.handle,
                           experiment_name="experiment-1",
                           alignment_file=self.alignment_file_1,
                           reference_name="hg38",
                           lengths_file=self.ref_len_file,
                           annotation_file=self.annotation_file,
                           metagene_radius=METAGENE_RADIUS,
                           left_span=LEFT_SPAN,
                           right_span=RIGHT_SPAN,
                           length_min=LENGTH_MIN,
                           length_max=LENGTH_MAX,
                           store_coverage=True,
                           nprocess=NPROCESS,
                           tmp_file_prefix="")

        self.handle.close()
        self.sample_ribo = Ribo(self.handle_io)

        # Get the  region counts

        self.cds_counts = \
           self.sample_ribo.get_region_counts( region_name    = CDS_name,
                                               sum_references = False)

        self.utr5_counts = \
           self.sample_ribo.get_region_counts( region_name    = UTR5_name,
                                               sum_references = False)

        self.utr3_counts = \
           self.sample_ribo.get_region_counts( region_name    = UTR3_name,
                                               sum_references = False)

        self.utr5j_counts = \
           self.sample_ribo.get_region_counts( region_name    = UTR5_JUNCTION_name,
                                               sum_references = False)

        self.utr3j_counts = \
           self.sample_ribo.get_region_counts( region_name    = UTR3_JUNCTION_name,
                                               sum_references = False)