def test_test_transcripts(self):
     transcripts = read_first_locus("annotate_test1.gtf")
     t_dict = dict((t.attrs['transcript_id'], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     t = t_dict['AA']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 1)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "B")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "0")
     t = t_dict['BB']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "B")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict['CC']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 1)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict['DD']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 3)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict['EE']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 5)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict['FF']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 6)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
 def test_test_transcripts(self):
     transcripts = read_first_locus("annotate_test1.gtf")
     t_dict = dict((t.attrs["transcript_id"], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     t = t_dict["AA"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 1)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "B")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "0")
     t = t_dict["BB"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "B")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict["CC"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 1)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict["DD"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 3)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict["EE"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 5)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
     t = t_dict["FF"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 6)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "C")
     self.assertTrue(t.attrs[GTFAttr.TEST] == "1")
 def test_find_best_match(self):
     transcripts = read_first_locus("annotate_best_match1.gtf")
     t_dict = dict((t.attrs['transcript_id'], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     t = t_dict['T1']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'D')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 1.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.9375, 2)
     t = t_dict['T2']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'B')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.25, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.6744, 2)
 def test_find_best_match(self):
     transcripts = read_first_locus("annotate_best_match1.gtf")
     t_dict = dict((t.attrs["transcript_id"], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     t = t_dict["T1"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "D")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 1.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.9375, 2)
     t = t_dict["T2"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "B")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.25, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.6744, 2)
 def test_intergenic(self):
     transcripts = read_first_locus("annotate_intergenic1.gtf")
     t_dict = dict((t.attrs['transcript_id'], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     t = t_dict['T1']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 6)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'na')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_RECURRENCE], 2.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_SCORE], 20.0, 2)
     t = t_dict['F']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 6)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'na')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_RECURRENCE], 2.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_SCORE], 20.0, 2)
 def test_intergenic(self):
     transcripts = read_first_locus("annotate_intergenic1.gtf")
     t_dict = dict((t.attrs["transcript_id"], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     t = t_dict["T1"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 6)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "na")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_RECURRENCE], 2.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_SCORE], 20.0, 2)
     t = t_dict["F"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 6)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "na")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_RECURRENCE], 2.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.MEAN_SCORE], 20.0, 2)
 def test_categories(self):
     transcripts = read_first_locus("annotate_category1.gtf")
     t_dict = dict((t.attrs['transcript_id'], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     # intronic same strand
     self.assertTrue(t_dict['T2'].attrs[GTFAttr.CATEGORY] == 2)
     # intronic opposite strand
     self.assertTrue(t_dict['T3'].attrs[GTFAttr.CATEGORY] == 3)
     # intronic ambiguous
     self.assertTrue(t_dict['T6'].attrs[GTFAttr.CATEGORY] == 4)
     # interleaving
     self.assertTrue(t_dict['T4'].attrs[GTFAttr.CATEGORY] == 5)
     # interleaving
     self.assertTrue(t_dict['T5'].attrs[GTFAttr.CATEGORY] == 3)
     # opp strand overlap (no introns)
     t = t_dict['T7']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 1)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'T1')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.2, 2)
     # same strand overlap (no introns)
     t = t_dict['T8']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'T1')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.10, 2)
     # same strand overlap (with introns)
     t = t_dict['T9']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'T1')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.4, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 2. / 3, 2)
     # another same strand overlap (with introns)
     t = t_dict['T10']
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == 'T1')
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.5, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.6, 2)
 def test_categories(self):
     transcripts = read_first_locus("annotate_category1.gtf")
     t_dict = dict((t.attrs["transcript_id"], t) for t in transcripts)
     annotate_locus(transcripts, gtf_sample_attr="sample_id")
     # intronic same strand
     self.assertTrue(t_dict["T2"].attrs[GTFAttr.CATEGORY] == 2)
     # intronic opposite strand
     self.assertTrue(t_dict["T3"].attrs[GTFAttr.CATEGORY] == 3)
     # intronic ambiguous
     self.assertTrue(t_dict["T6"].attrs[GTFAttr.CATEGORY] == 4)
     # interleaving
     self.assertTrue(t_dict["T4"].attrs[GTFAttr.CATEGORY] == 5)
     # interleaving
     self.assertTrue(t_dict["T5"].attrs[GTFAttr.CATEGORY] == 3)
     # opp strand overlap (no introns)
     t = t_dict["T7"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 1)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "T1")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.2, 2)
     # same strand overlap (no introns)
     t = t_dict["T8"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "T1")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.0, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.10, 2)
     # same strand overlap (with introns)
     t = t_dict["T9"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "T1")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.4, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 2.0 / 3, 2)
     # another same strand overlap (with introns)
     t = t_dict["T10"]
     self.assertTrue(t.attrs[GTFAttr.CATEGORY] == 0)
     self.assertTrue(t.attrs[GTFAttr.ANN_REF_ID] == "T1")
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_INTRON_RATIO], 0.5, 2)
     self.assertAlmostEqual(t.attrs[GTFAttr.ANN_COV_RATIO], 0.6, 2)