Ejemplo n.º 1
0
 def test_b3_seq(self):
     b3 = bed12.BED12(self.bed3,
                      transcriptomic=True,
                      fasta_index=self.index)
     self.assertFalse(b3.invalid,
                      (len(b3), len(self.index[b3.id]), b3.invalid_reason,
                       (b3.thick_end, b3.thick_start),
                       (b3.thick_end - b3.thick_start + 1) % 3))
Ejemplo n.º 2
0
    def test_3_partial(self):

        line = "\t".join([
            'class_Chr1.1004.0', '0', '1060',
            'ID=class_Chr1.1004.0|m.22214;class_Chr1.1004.0|g.22214;ORF_class_Chr1.1004.0|g.22214_class_Chr1.1004.0|m.22214_type:3prime_partial_len:300_(+)',
            '0', '+', '162', '1060', '0', '1', '1060', '0'
        ])
        bed_line = bed12.BED12(line, transcriptomic=True)
        self.assertFalse(bed_line.invalid, bed_line.invalid_reason)
Ejemplo n.º 3
0
    def test_regression(self):

        sequence = """TC
CTCACAGTTACTATAAGCTCGTCT
ATGGCCAGAGACGGTGGTGTTTCTTGTTTACGAA
GGTCGGAGATGATGAGCGTCGGTGGTATCGGAGGAATTGAATCTGCGCCGTTGGATTTAG
ATGAAGTTCATGTCTTAGCCGTTGATGACAGTCTCGTTGATCGTATTGTCATCGAGAGAT
TGCTTCGTATTACTTCCTGCAAAGTTACGGCGGTAGATAGTGGATGGCGTGCTCTGGAAT
TTCTAGGGTTAGATAATGAGAAAGCTTCTGCTGAATTCGATAGATTGAAAGTTGATTTGA
TCATCACTGATTACTGTATGCCTGGAATGACTGGTTATGAGCTTCTCAAGAAGATTAAGG
AATCGTCCAATTTCAGAGAAGTTCCGGTTGTAATCATGTCGTCGGAGAATGTATTGACCA
GAATCGACAGATGCCTTGAGGAAGGTGCTCAAGATTTCTTATTGAAACCGGTGAAACTCG
CCGACGTGAAACGTCTGAGAAGTCATTTAACTAAAGACGTTAAACTTTCCAACGGAAACA
AACGGAAGCTTCCGGAAGATTCTAGTTCCGTTAACTCTTCGCTTCCTCCACCGTCACCTC
CGTTGACTATCTCGCCTGA"""

        record = SeqRecord.SeqRecord(Seq.Seq(sub("\n", "", sequence)),
                                     id="class_Chr1.1006.0")
        index = {record.id: record}

        line = "\t".join([
            'class_Chr1.1006.0', '0', '619',
            'ID=class_Chr1.1006.0|m.22308;class_Chr1.1006.0|g.22308;ORF_class_Chr1.1006.0|g.22308_class_Chr1.1006.0|m.22308_type:internal_len:206_(+)',
            '0', '+', '2', '617', '0', '1', '619', '0'
        ])

        # Now we are going back to find the start codon
        bed_line = bed12.BED12(line,
                               transcriptomic=True,
                               fasta_index=index,
                               max_regression=0.2)
        self.assertFalse(bed_line.invalid, bed_line.invalid_reason)
        self.assertEqual(bed_line.phase, 0)
        # Start codon in frame found at location 27
        self.assertEqual(bed_line.thick_start, 27)
        self.assertTrue(bed_line.has_start_codon)
        self.assertFalse(bed_line.has_stop_codon)

        lines = """Chr1	CLASS	transcript	3442811	3443785	1000	-	.	gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0"; exon_number "1"; Abundance "22.601495"; canonical_proportion "1.0";
Chr1	CLASS	exon	3442811	3442999	.	-	.	gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0";
Chr1	CLASS	exon	3443099	3443169	.	-	.	gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0";
Chr1	CLASS	exon	3443252	3443329	.	-	.	gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0";
Chr1	CLASS	exon	3443417	3443493	.	-	.	gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0";
Chr1	CLASS	exon	3443582	3443785	.	-	.	gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0";"""

        lines = [GTF.GtfLine(_) for _ in lines.split("\n") if _]

        transcript = Transcript(lines[0])
        transcript.add_exons(lines[1:])
        transcript.finalize()
        transcript.load_orfs([bed_line])
        self.assertTrue(transcript.is_coding)
        self.assertTrue(transcript.has_start_codon)
        self.assertFalse(transcript.has_stop_codon)
        self.assertEqual(transcript.selected_cds_end, transcript.start)
        self.assertEqual(transcript.selected_cds_start, transcript.end - 26)
Ejemplo n.º 4
0
    def test_relocation(self):

        bed = bed12.BED12(self.bed_row,
                          fasta_index=self.index,
                          transcriptomic=True,
                          max_regression=0.3)
        # print(self.seq[bed.thick_start-1:bed.thick_end].seq.translate())

        self.assertEqual(bed.thick_start, 195)
        self.assertEqual(bed.phase, 0)
Ejemplo n.º 5
0
 def test_b4_seq(self):
     b4 = bed12.BED12(self.bed4,
                      transcriptomic=True,
                      fasta_index=self.index)
     self.assertFalse(b4.invalid,
                      (len(b4), b4.invalid_reason, len(self.index[b4.id])))
     self.assertTrue(b4.has_start_codon)
     self.assertTrue(b4.has_stop_codon)
     self.assertTrue(b4.thick_start, 641)
     self.assertTrue(b4.thick_end, 1112)
     self.assertTrue(b4.cds_len, 1112 - 641)
Ejemplo n.º 6
0
 def test_b2_seq_no_start(self):
     b2 = bed12.BED12(self.bed2,
                      transcriptomic=True,
                      fasta_index=self.index,
                      max_regression=0)
     self.assertNotIn(str(self.index[b2.chrom][766 + 3:766 + 6].seq),
                      ("TAG", "TGA", "TAA"))
     self.assertEqual(b2.start, 1)
     self.assertEqual(len(b2), 809)
     self.assertFalse(
         b2.has_start_codon,
         (b2.thick_start, b2.thick_end, self.bed2.split("\t")[6:8],
          self.index[b2.chrom][b2.thick_start + (3 - b2.phase - 1) % 3 -
                               1:b2.thick_end].seq.translate()))
Ejemplo n.º 7
0
    def test_b1_seq(self):
        b1 = bed12.BED12(self.bed1,
                         transcriptomic=True,
                         fasta_index=self.index)
        self.assertIn(str(self.index[b1.chrom][386 + 3:386 + 6].seq),
                      ("TAG", "TGA", "TAA"))

        self.assertEqual(b1.start, 1)
        self.assertEqual(len(b1), 784)
        self.assertEqual(
            "ATG",
            str(self.index[b1.chrom][b1.thick_start - 1:b1.thick_start +
                                     2].seq),
            str(self.index[b1.chrom][b1.thick_start - 1:b1.thick_start +
                                     2].seq))

        self.assertEqual("ATG", b1.start_codon, b1.start_codon)
        self.assertEqual(b1.thick_start, 30)
        self.assertEqual(b1.thick_end, 386)

        self.assertTrue(b1.has_stop_codon)
Ejemplo n.º 8
0
 def test_b4(self):
     b4 = bed12.BED12(self.bed4, transcriptomic=True)
     self.assertFalse(b4.invalid)
     self.assertEqual(b4.start, 1)
     self.assertEqual(len(b4), 3604)
     self.assertEqual(b4.cds_len, 1115 - 641, (b4.cds_len, 1115 - 641))
Ejemplo n.º 9
0
 def test_b3(self):
     b3 = bed12.BED12(self.bed3, transcriptomic=True)
     self.assertFalse(b3.invalid)
     self.assertEqual(b3.start, 1)
     self.assertEqual(len(b3), 3683)
Ejemplo n.º 10
0
 def test_b2(self):
     b2 = bed12.BED12(self.bed2, transcriptomic=True)
     self.assertEqual(b2.start, 1)
     self.assertEqual(len(b2), 809)
     self.assertEqual(b2.thick_start, 2)
     self.assertEqual(b2.thick_end, 766)
Ejemplo n.º 11
0
 def test_b1(self):
     b1 = bed12.BED12(self.bed1, transcriptomic=True)
     self.assertEqual(b1.start, 1)
     self.assertEqual(len(b1), 784)
     self.assertEqual(b1.thick_start, 30)
     self.assertEqual(b1.thick_end, 386)