Esempio n. 1
0
    def test_simple_split(self):
        """
        Check that the original transcript can be split in two
        :return:
        """

        self.assertIsNotNone(self.transcript.configuration)

        self.transcript.configuration.pick.chimera_split.blast_check = False

        # with self.assertLogs("test_mono", level="DEBUG") as log_split:
        new_transcripts = list(splitting.split_by_cds(self.transcript))

        self.assertEqual(len(new_transcripts), 2,
                         "\n".join(str(_) for _ in new_transcripts))
        self.assertEqual(new_transcripts[0].start, self.transcript.start)
        self.assertEqual(new_transcripts[1].end, self.transcript.end)

        sl = loci.Superlocus(self.transcript,
                             configuration=self.transcript.configuration)
        self.assertFalse(sl.configuration.pick.chimera_split.blast_check)
        self.assertEqual(len(sl.transcripts), 1)
        sl.logger.setLevel("DEBUG")
        sl.load_all_transcript_data()
        self.assertEqual(len(sl.transcripts), 2)
Esempio n. 2
0
    def test_spanning_hit_lenient(self):

        self.transcript.blast_hits = [self.get_spanning_hit()]
        self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = True

        self.transcript.json_conf["pick"][
            "chimera_split"]["blast_params"]["leniency"] = "LENIENT"

        self.assertEqual(1,
                         len(list(splitting.split_by_cds(self.transcript))))
Esempio n. 3
0
    def test_lenient_split_twohits(self):

        hit2 = self.get_second_hit()
        self.transcript.blast_hits.append(hit2)
        self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = True

        self.transcript.json_conf["pick"][
            "chimera_split"]["blast_params"]["leniency"] = "LENIENT"

        self.assertEqual(2,
                         len(list(splitting.split_by_cds(self.transcript))))
Esempio n. 4
0
    def testPositive(self):

        self.bed1 = parsers.bed12.BED12()
        self.header = False
        self.bed1.chrom = "transcript1"
        self.bed1.start = 1
        self.bed1.end = 5000
        self.bed1.name = "Bed1"
        self.bed1.score = 0
        self.bed1.strand = "+"
        self.bed1.thick_start = 1
        self.bed1.thick_end = 3002
        self.bed1.phase = 2
        self.bed1.block_counts = 1
        self.bed1.block_sizes = [3002]
        self.bed1.block_starts = [1]
        self.bed1.transcriptomic = True
        self.bed1.has_start_codon = False
        self.bed1.has_stop_codon = True
        self.assertFalse(self.bed1.invalid, self.bed1.invalid_reason)

        self.bed2 = parsers.bed12.BED12()
        self.header = False
        self.bed2.chrom = "transcript1"
        self.bed2.start = 1
        self.bed2.end = 5000
        self.bed2.name = "Bed2"
        self.bed2.score = 0
        self.bed2.strand = "+"
        self.bed2.thick_start = 4001
        self.bed2.thick_end = 4900
        self.bed2.block_counts = 1
        self.bed2.block_sizes = [900]
        self.bed2.block_starts = [4001]
        self.bed2.transcriptomic = True
        self.bed2.has_start_codon = True
        self.bed2.has_stop_codon = True
        self.assertFalse(self.bed2.invalid)

        self.transcript.load_orfs([self.bed1, self.bed2])
        self.assertTrue(self.transcript.is_coding)
        self.assertEqual(self.transcript.number_internal_orfs, 2,
                         str(self.transcript))
        self.transcript.finalize()

        # with self.assertLogs("test_mono", level="DEBUG") as log_split:
        new_transcripts = list(splitting.split_by_cds(self.transcript))

        # self.assertIn("DEBUG:test_mono:",
        #               log_split.output)
        self.assertEqual(len(new_transcripts), 2,
                         "\n".join(str(_) for _ in new_transcripts))
        self.assertEqual(new_transcripts[0].start, self.transcript.start)
        self.assertEqual(new_transcripts[1].end, self.transcript.end)
Esempio n. 5
0
    def test_spanning_hit_nocheck(self):
        self.transcript.blast_hits = [self.get_spanning_hit()]
        self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = False
        cds_boundaries = SortedDict()
        for orf in sorted(self.transcript.loaded_bed12,
                          key=operator.attrgetter("thick_start", "thick_end")):
            cds_boundaries[(orf.thick_start, orf.thick_end)] = [orf]

        self.assertEqual(len(cds_boundaries), 2)
        self.assertEqual(self.transcript.number_internal_orfs, 2)
        self.assertEqual(2,
                         len(list(splitting.split_by_cds(self.transcript))))
Esempio n. 6
0
    def test_deleted_hits(self):

        delattr(self.transcript, "blast_hits")
        self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = True
        self.transcript.json_conf["pick"][
            "chimera_split"]["blast_params"]["leniency"] = "LENIENT"
        self.transcript.logger = self.logger
        with self.assertLogs("null", level="WARNING") as log_split:
            self.assertEqual(2,
                             len(list(splitting.split_by_cds(self.transcript))))

        self.assertIn("WARNING:null:BLAST hits store lost for transcript1! Creating a mock one to avoid a crash",
                      log_split.output)
Esempio n. 7
0
    def test_deleted_hits(self):

        delattr(self.transcript, "blast_hits")
        self.transcript.configuration.pick.chimera_split.blast_check = True
        self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT"
        self.transcript.logger = self.logger
        with self.assertLogs(logger=self.logger, level="WARNING") as log_split:
            self.assertEqual(
                2, len(list(splitting.split_by_cds(self.transcript))))

        self.assertIn(
            "WARNING:test_mono:BLAST hits store lost for transcript1! Creating a mock one to avoid a crash",
            log_split.output)
Esempio n. 8
0
    def test_spanning_hit_lenient(self):

        self.transcript.blast_hits = [self.get_spanning_hit()]
        self.transcript.configuration.pick.chimera_split.blast_check = True

        self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT"

        self.assertEqual(1, len(list(splitting.split_by_cds(self.transcript))))
        sl = loci.Superlocus(self.transcript,
                             configuration=self.transcript.configuration)
        self.assertEqual(len(sl.transcripts), 1)
        sl.load_all_transcript_data()
        self.assertEqual(len(sl.transcripts), 1)
Esempio n. 9
0
    def test_lenient_split_twohits(self):

        hit2 = self.get_second_hit()
        self.transcript.blast_hits.append(hit2)
        self.transcript.configuration.pick.chimera_split.blast_check = True

        self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT"

        self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript))))
        sl = loci.Superlocus(self.transcript,
                             configuration=self.transcript.configuration)
        self.assertEqual(len(sl.transcripts), 1)
        sl.load_all_transcript_data()
        self.assertEqual(len(sl.transcripts), 2)
Esempio n. 10
0
    def test_no_hsps(self):
        self.transcript.blast_hits = []
        self.transcript.configuration.pick.chimera_split.blast_check = True

        self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT"

        logger = utilities.log_utils.create_default_logger("test_no_hsps")
        logger.setLevel("DEBUG")
        self.transcript.logger = logger
        self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript))))
        sl = loci.Superlocus(self.transcript,
                             configuration=self.transcript.configuration)
        self.assertEqual(len(sl.transcripts), 1)
        sl.load_all_transcript_data()
        self.assertEqual(len(sl.transcripts), 2)
Esempio n. 11
0
    def test_spanning_hit_nocheck(self):
        self.transcript.blast_hits = [self.get_spanning_hit()]
        self.transcript.configuration.pick.chimera_split.blast_check = False
        cds_boundaries = SortedDict()
        for orf in sorted(self.transcript.loaded_bed12,
                          key=operator.attrgetter("thick_start", "thick_end")):
            cds_boundaries[(orf.thick_start, orf.thick_end)] = [orf]

        self.assertEqual(len(cds_boundaries), 2)
        self.assertEqual(self.transcript.number_internal_orfs, 2)
        self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript))))
        sl = loci.Superlocus(self.transcript,
                             configuration=self.transcript.configuration)
        self.assertEqual(len(sl.transcripts), 1)
        sl.load_all_transcript_data()
        self.assertEqual(len(sl.transcripts), 2)
Esempio n. 12
0
 def test_no_splitting_by_source(self):
     self.transcript.source = "foo"
     for sources in [[], [self.transcript.source], ["bar"],
                     ["bar", [self.transcript.source]]]:
         with self.subTest(sources=sources):
             self.transcript.configuration.pick.chimera_split.skip = sources
             if self.transcript.source in sources:
                 final = 1
             else:
                 final = 2
             self.assertEqual(
                 final, len(list(splitting.split_by_cds(self.transcript))))
             sl = loci.Superlocus(
                 self.transcript,
                 configuration=self.transcript.configuration)
             sl.load_all_transcript_data()
             self.assertEqual(len(sl.transcripts), final)
Esempio n. 13
0
    def test_simple_split(self):

        """
        Check that the original transcript can be split in two
        :return:
        """

        self.assertIsNotNone(self.transcript.json_conf)
        self.assertIn("pick", self.transcript.json_conf)

        self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = False

        # with self.assertLogs("test_mono", level="DEBUG") as log_split:
        new_transcripts = list(splitting.split_by_cds(self.transcript))

        # print(log_split.output)

        # self.assertIn("DEBUG:test_mono:",
        #               log_split.output)
        self.assertEqual(len(new_transcripts), 2, "\n".join(str(_) for _ in new_transcripts))
        self.assertEqual(new_transcripts[0].start, self.transcript.start)
        self.assertEqual(new_transcripts[1].end, self.transcript.end)
Esempio n. 14
0
    def testNegative(self):
        self.bed1 = parsers.bed12.BED12()
        self.header = False
        self.bed1.chrom = "transcript1"
        self.bed1.start = 1
        self.bed1.end = 5000
        self.bed1.name = "Bed1"
        self.bed1.score = 0
        self.bed1.strand = "-"
        self.bed1.thick_start = 1
        self.bed1.thick_end = 3000
        self.bed1.phase = 0
        self.bed1.block_counts = 1
        self.bed1.block_sizes = [3001]
        self.bed1.block_starts = [1]
        self.bed1.transcriptomic = True
        self.bed1.has_start_codon = True
        self.bed1.has_stop_codon = True
        self.assertFalse(self.bed1.invalid, self.bed1.invalid_reason)

        logger = create_default_logger("testNegative", "DEBUG")
        self.bed2 = parsers.bed12.BED12(logger=logger)
        self.header = False
        self.bed2.chrom = "transcript1"
        self.bed2.start = 1
        self.bed2.end = 5000
        self.bed2.name = "Bed2"
        self.bed2.score = 0
        self.bed2.strand = "-"
        self.bed2.thick_start = 4001
        self.bed2.thick_end = 5000
        self.bed2.phase = 1
        self.bed2.block_counts = 1
        self.bed2.block_sizes = [1000]
        self.bed2.block_starts = [4001]
        self.bed2.transcriptomic = True
        self.bed2.has_start_codon = False
        self.bed2.has_stop_codon = True
        self.assertFalse(self.bed2.invalid,
                         (self.bed2.phase, self.bed2.invalid_reason))

        self.transcript.load_orfs([self.bed1, self.bed2])
        self.assertTrue(self.transcript.is_coding)
        self.assertEqual(self.transcript.number_internal_orfs, 2,
                         str(self.transcript))
        self.transcript.finalize()

        self.assertEqual(self.transcript.number_internal_orfs, 2,
                         str(self.transcript))

        # with self.assertLogs("test_mono", level="DEBUG") as log_split:
        new_transcripts = list(splitting.split_by_cds(self.transcript))

        self.assertEqual(len(new_transcripts), 2,
                         "\n".join(str(_) for _ in new_transcripts))
        self.assertEqual(new_transcripts[0].start, self.transcript.start)
        self.assertEqual(new_transcripts[1].end, self.transcript.end)

        self.assertEqual(self.transcript.combined_cds_start, 6000)
        self.assertEqual(self.transcript.combined_cds_end, 1001)
        self.assertEqual(self.transcript.selected_cds_end, 1001)
        self.assertEqual(self.transcript.selected_cds_start, 4000)
        self.assertEqual(self.transcript.strand, "-")

        sl = loci.Superlocus(self.transcript,
                             configuration=self.transcript.configuration)
        self.assertEqual(len(sl.transcripts), 1)
        sl.load_all_transcript_data()
        self.assertEqual(len(sl.transcripts), 2)
Esempio n. 15
0
 def test_one_orf(self):
     self.transcript.strip_cds()
     self.transcript.load_orfs([self.bed1])
     self.assertEqual(1, len(list(splitting.split_by_cds(self.transcript))))