def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([Exon(0,100), Exon(200,300), Exon(400,500),Exon(600,700), Exon(800,900)]) PATH_ACE = tuple([Exon(0,100), Exon(400,500), Exon(800,900)]) PATH_ABCE = tuple([Exon(0,100), Exon(200,300), Exon(400,500), Exon(800,900)]) PATH_ACDE = tuple([Exon(0,100), Exon(400,500),Exon(600,700), Exon(800,900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") GG = get_transcript_graphs(transcripts) G,tmap = GG[POS_STRAND] # set transcript scores tmap["ABCDE"].score = 2.0 tmap["ACE"].score = 1.0 tmap["ABCE"].score = 1.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 2 # assemble GS = list(prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # set transcript scores tmap["ABCDE"].score = 4.0 tmap["ACE"].score = 3.0 tmap["ABCE"].score = 2.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 3 # assemble GS = list(prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3)
def assemble_gene( locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, G, strand, partial_paths, config, gtf_fileh, bed_fileh, ): # run assembly algorithm path_info_list = assemble_transcript_graph( G, strand, partial_paths, config.kmax, config.ksensitivity, config.fraction_major_isoform, config.max_paths ) logging.debug("\tAssembled %d transcript(s)" % (len(path_info_list))) # determine gene ids and tss ids annotate_gene_and_tss_ids(path_info_list, strand, gene_id_value_obj, tss_id_value_obj) # bin transcripts by gene id gene_path_info_dict = collections.defaultdict(lambda: []) for p in path_info_list: gene_path_info_dict[p.gene_id].append(p) for gene_path_info_list in gene_path_info_dict.itervalues(): # highest scoring path is always first in list highest_score = max(1e-8, gene_path_info_list[0].score) # create GTF features for each transcript path for p in gene_path_info_list: # assign transcript id t_id = t_id_value_obj.next() # get strings for each id t_id_str = "TU%d" % t_id tss_id_str = "TSS%d" % (p.tss_id) gene_id_str = "G%d" % (p.gene_id) # compute isoform fractions frac = p.score / highest_score # write to GTF if config.create_gtf: for f in get_gtf_features( locus_chrom, strand, p.path, locus_id=locus_id_str, gene_id=gene_id_str, tss_id=tss_id_str, transcript_id=t_id_str, score=p.score, frac=frac, ): print >> gtf_fileh, str(f) # write to BED if config.create_bed: name = "%s|%s(%.1f)" % (gene_id_str, t_id_str, p.score) fields = write_bed(locus_chrom, name, strand, int(round(1000.0 * frac)), p.path) print >> bed_fileh, "\t".join(fields)
def assemble_gene(locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, G, strand, partial_paths, config, gtf_fileh, bed_fileh): # run assembly algorithm path_info_list = assemble_transcript_graph(G, strand, partial_paths, config.kmax, config.ksensitivity, config.fraction_major_isoform, config.max_paths) logging.debug("\tAssembled %d transcript(s)" % (len(path_info_list))) # determine gene ids and tss ids annotate_gene_and_tss_ids(path_info_list, strand, gene_id_value_obj, tss_id_value_obj) # bin transcripts by gene id gene_path_info_dict = collections.defaultdict(lambda: []) for p in path_info_list: gene_path_info_dict[p.gene_id].append(p) for gene_path_info_list in gene_path_info_dict.itervalues(): # highest scoring path is always first in list highest_score = max(1e-8, gene_path_info_list[0].score) # create GTF features for each transcript path for p in gene_path_info_list: # assign transcript id t_id = t_id_value_obj.next() # get strings for each id t_id_str = "TU%d" % t_id tss_id_str = "TSS%d" % (p.tss_id) gene_id_str = "G%d" % (p.gene_id) # compute isoform fractions frac = p.score / highest_score # write to GTF if config.create_gtf: for f in get_gtf_features(locus_chrom, strand, p.path, locus_id=locus_id_str, gene_id=gene_id_str, tss_id=tss_id_str, transcript_id=t_id_str, score=p.score, frac=frac): print >>gtf_fileh, str(f) # write to BED if config.create_bed: name = "%s|%s(%.1f)" % (gene_id_str, t_id_str, p.score) fields = write_bed(locus_chrom, name, strand, int(round(1000.0*frac)), p.path) print >>bed_fileh, '\t'.join(fields)
def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([ Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900) ]) PATH_ACE = tuple([Exon(0, 100), Exon(400, 500), Exon(800, 900)]) PATH_ABCE = tuple( [Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900)]) PATH_ACDE = tuple( [Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") GG = get_transcript_graphs(transcripts) G, tmap = GG[POS_STRAND] # set transcript scores tmap["ABCDE"].score = 2.0 tmap["ACE"].score = 1.0 tmap["ABCE"].score = 1.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 2 # assemble GS = list( prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # set transcript scores tmap["ABCDE"].score = 4.0 tmap["ACE"].score = 3.0 tmap["ABCE"].score = 2.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 3 # assemble GS = list( prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3)
def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([Exon(0,100), Exon(200,300), Exon(400,500),Exon(600,700), Exon(800,900)]) PATH_ACE = tuple([Exon(0,100), Exon(400,500), Exon(800,900)]) PATH_ABCE = tuple([Exon(0,100), Exon(200,300), Exon(400,500), Exon(800,900)]) PATH_ACDE = tuple([Exon(0,100), Exon(400,500),Exon(600,700), Exon(800,900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") tdict = dict((t.attrs['transcript_id'],t) for t in transcripts) # set transcript scores tdict["ABCDE"].score = 2.0 tdict["ACE"].score = 1.0 tdict["ABCE"].score = 1.0 tdict["ACDE"].score = 1.0 # create graphs GS = create_transcript_graphs('chr1', transcripts, create_bedgraph=False, bedgraph_filehs=None, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.0) Gsub, strand, partial_paths = GS[0].Gsub, GS[0].strand, GS[0].partial_paths # assemble with kmax=2 results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=2, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # change transcript scores tdict["ABCDE"].score = 4.0 tdict["ACE"].score = 3.0 tdict["ABCE"].score = 2.0 tdict["ACDE"].score = 1.0 # create graphs GS = create_transcript_graphs('chr1', transcripts, create_bedgraph=False, bedgraph_filehs=None, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.0) Gsub, strand, partial_paths = GS[0].Gsub, GS[0].strand, GS[0].partial_paths # assemble with kmax=3 results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=3, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3) return
def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([ Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900) ]) PATH_ACE = tuple([Exon(0, 100), Exon(400, 500), Exon(800, 900)]) PATH_ABCE = tuple( [Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900)]) PATH_ACDE = tuple( [Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") tdict = dict((t.attrs['transcript_id'], t) for t in transcripts) # set transcript scores tdict["ABCDE"].score = 2.0 tdict["ACE"].score = 1.0 tdict["ABCE"].score = 1.0 tdict["ACDE"].score = 1.0 # create graphs GS = create_transcript_graphs('chr1', transcripts, create_bedgraph=False, bedgraph_filehs=None, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.0) Gsub, strand, partial_paths = GS[0].Gsub, GS[0].strand, GS[ 0].partial_paths # assemble with kmax=2 results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=2, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # change transcript scores tdict["ABCDE"].score = 4.0 tdict["ACE"].score = 3.0 tdict["ABCE"].score = 2.0 tdict["ACDE"].score = 1.0 # create graphs GS = create_transcript_graphs('chr1', transcripts, create_bedgraph=False, bedgraph_filehs=None, min_trim_length=0, trim_utr_fraction=0.0, trim_intron_fraction=0.0) Gsub, strand, partial_paths = GS[0].Gsub, GS[0].strand, GS[ 0].partial_paths # assemble with kmax=3 results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=3, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3) return