def test_trim_bidir(self): transcripts = read_first_locus("trim_bidir1.gtf", score_attr="FPKM") GG = get_transcript_graphs(transcripts) G,tmap = GG[POS_STRAND] # trim at three different thresholds trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.015, trim_intron_fraction=0.0) correct = set([Exon(0,100), Exon(900,1000)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.11, trim_intron_fraction=0.0) correct = set([Exon(0,100), Exon(900,1000), Exon(100,200), Exon(800,900)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.26, trim_intron_fraction=0.0) correct = set([Exon(0,100), Exon(900,1000), Exon(100,200), Exon(800,900), Exon(200,300), Exon(700,800)]) self.assertTrue(trim_nodes == correct) # flip sign of transcripts and try again for t in transcripts: t.strand = NEG_STRAND GG = get_transcript_graphs(transcripts) G,tmap = GG[NEG_STRAND] # trim at three different thresholds trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.015, trim_intron_fraction=0.0) correct = set([Exon(0,100), Exon(900,1000)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.11, trim_intron_fraction=0.0) correct = set([Exon(0,100), Exon(900,1000), Exon(100,200), Exon(800,900)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.26, trim_intron_fraction=0.0) correct = set([Exon(0,100), Exon(900,1000), Exon(100,200), Exon(800,900), Exon(200,300), Exon(700,800)]) self.assertTrue(trim_nodes == correct)
def test_trim_intron_retention(self): transcripts = read_first_locus("trim_intron_retention1.gtf", score_attr="FPKM") GG = get_transcript_graphs(transcripts) G, tmap = GG[POS_STRAND] # trim at different thresholds trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.01) correct = set() self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.11) correct = set([Exon(500, 1500)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.21) correct = set([Exon(500, 1500), Exon(2000, 9000)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=1.0) correct = set([Exon(500, 1500), Exon(2000, 9000)]) self.assertTrue(trim_nodes == correct)
def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([Exon(0,100), Exon(200,300), Exon(400,500),Exon(600,700), Exon(800,900)]) PATH_ACE = tuple([Exon(0,100), Exon(400,500), Exon(800,900)]) PATH_ABCE = tuple([Exon(0,100), Exon(200,300), Exon(400,500), Exon(800,900)]) PATH_ACDE = tuple([Exon(0,100), Exon(400,500),Exon(600,700), Exon(800,900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") GG = get_transcript_graphs(transcripts) G,tmap = GG[POS_STRAND] # set transcript scores tmap["ABCDE"].score = 2.0 tmap["ACE"].score = 1.0 tmap["ABCE"].score = 1.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 2 # assemble GS = list(prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # set transcript scores tmap["ABCDE"].score = 4.0 tmap["ACE"].score = 3.0 tmap["ABCE"].score = 2.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 3 # assemble GS = list(prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3)
def test_trim_intron_retention(self): transcripts = read_first_locus("trim_intron_retention1.gtf", score_attr="FPKM") GG = get_transcript_graphs(transcripts) G,tmap = GG[POS_STRAND] # trim at different thresholds trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.01) correct = set() self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.11) correct = set([Exon(500,1500)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.21) correct = set([Exon(500,1500), Exon(2000,9000)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=1.0) correct = set([Exon(500,1500), Exon(2000,9000)]) self.assertTrue(trim_nodes == correct)
def test_trim_intron_bidir(self): transcripts = read_first_locus("trim_intron_bidir1.gtf", score_attr="FPKM") GG = get_transcript_graphs(transcripts) G, tmap = GG[POS_STRAND] # trim at different thresholds trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.001) correct = set() self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.025) correct = set([Exon(1900, 2000), Exon(1000, 1100)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.2) correct = set([ Exon(1900, 2000), Exon(1100, 1200), Exon(1800, 1900), Exon(1000, 1100) ]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.25) correct = set([ Exon(1900, 2000), Exon(1100, 1200), Exon(1200, 1300), Exon(1700, 1800), Exon(1800, 1900), Exon(1000, 1100) ]) self.assertTrue(trim_nodes == correct) # flip sign of transcripts and try again for t in transcripts: t.strand = NEG_STRAND GG = get_transcript_graphs(transcripts) G, tmap = GG[NEG_STRAND] # trim at different thresholds trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.001) correct = set() self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.025) correct = set([Exon(1900, 2000), Exon(1000, 1100)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.2) correct = set([ Exon(1900, 2000), Exon(1100, 1200), Exon(1800, 1900), Exon(1000, 1100) ]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.25) correct = set([ Exon(1900, 2000), Exon(1100, 1200), Exon(1200, 1300), Exon(1700, 1800), Exon(1800, 1900), Exon(1000, 1100) ]) self.assertTrue(trim_nodes == correct)
def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([ Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900) ]) PATH_ACE = tuple([Exon(0, 100), Exon(400, 500), Exon(800, 900)]) PATH_ABCE = tuple( [Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900)]) PATH_ACDE = tuple( [Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") GG = get_transcript_graphs(transcripts) G, tmap = GG[POS_STRAND] # set transcript scores tmap["ABCDE"].score = 2.0 tmap["ACE"].score = 1.0 tmap["ABCE"].score = 1.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 2 # assemble GS = list( prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # set transcript scores tmap["ABCDE"].score = 4.0 tmap["ACE"].score = 3.0 tmap["ABCE"].score = 2.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 3 # assemble GS = list( prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3)
def test_trim_intronic_utr(self): transcripts = read_first_locus("trim_intron_utr1.gtf", score_attr="FPKM") GG = get_transcript_graphs(transcripts) G,tmap = GG[POS_STRAND] # trim at different thresholds trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.001) correct = set() self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.011) correct = set([Exon(1000,1100)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.055) correct = set([Exon(1000,1100), Exon(1100,1200)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.15) correct = set([Exon(1000,1100), Exon(1100,1200), Exon(1200,1300)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, POS_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=1.0) correct = set([Exon(1000,1100), Exon(1100,1200), Exon(1200,1300), Exon(1300,1500)]) self.assertTrue(trim_nodes == correct) # flip sign of transcripts and try again for t in transcripts: t.strand = NEG_STRAND GG = get_transcript_graphs(transcripts) G,tmap = GG[NEG_STRAND] # trim at different thresholds trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.001) correct = set() self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.011) correct = set([Exon(1000,1100)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.055) correct = set([Exon(1000,1100), Exon(1100,1200)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.15) correct = set([Exon(1000,1100), Exon(1100,1200), Exon(1200,1300)]) self.assertTrue(trim_nodes == correct) trim_nodes = trim_graph(G, NEG_STRAND, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=1.0) correct = set([Exon(1000,1100), Exon(1100,1200), Exon(1200,1300), Exon(1300,1500)]) self.assertTrue(trim_nodes == correct)