""" Reconstruct transcripts based on Splice Junctions """ #assign all splice junctions to specific gene: go through cruzdb & find end points for each gene --> assign # g = Genome( 'sqlite:////tmp/hg19.db' ) g = Genome('sqlite:////tmp/hg19_v2.db') Isoform.set_cruzdb(g) #retrieve gene & print information on it based on gene = g.refGene.filter_by(name2='BRAF').all() # all_genes = g.refGene.filter_by( name2 = 'TTN' ).first() # all_genes = g.refGene.filter_by( name2 = 'AGRN' ).all() # all_genes = g.refGene.filter_by( name2 = 'AGRN' ).first() # all_genes = g.refGene.filter_by( name2 = 'DIXDC1' ).all() for each_isoform in gene: obj_iso = Isoform(each_isoform.name) #print name print obj_iso.isoform_id, ":", obj_iso.gene_sym print "obj_iso = ", obj_iso # print "exons: " # for i, (k,v) in enumerate( obj_iso.hashExonList.iteritems() ): #k = feature name, v = Exon object # print i, ": ", k, " -> ", v # print "introns: " # for i, (k,v) in enumerate( obj_iso.hashIntronList.iteritems() ): #k = feature name, v = Exon object # print i, ": ", k, " -> ", v print "exons: "
tuple_exons = obj_sj.spliced_elems(isoform_id, False) print "exons ligated by SJ = ", tuple_exons hash_sj_exon_info = obj_sj.spliced_elems_position_range( isoform_id, False, True) print "show info about exons ligated by SJ: " print "range of exons = ", hash_sj_exon_info['str_sj_pos'] print "prev_elem = ", hash_sj_exon_info['prev_elem'] print "next_elem = ", hash_sj_exon_info['next_elem'] print "prev_exon_canon = ", hash_sj_exon_info['prev_exon_canon'] print "next_exon_canon = ", hash_sj_exon_info['next_exon_canon'] ##TEST:: # isoform_id = 'NR_024540' hash_pos = {'chrom': 'chr1', 'pos_oi': 14829} obj_iso = Isoform(isoform_id, hash_pos) print "UCSC gene = ", obj_iso.get_ucsc_gene_name(isoform_id) print "Ensembl transcript ID = ", obj_iso.get_ensembl_gene_name(isoform_id) ensembl_isoforms = obj_iso.get_ensembl_isoforms(isoform_id) print "Ensembl gene ID = ", ensembl_isoforms for i, each_isoform in enumerate(ensembl_isoforms): print "Ensembl gene ", i, " | gene name (ENSG) = ", each_isoform.name2, " | isoform (ENST) = ", each_isoform.name, " | start = ", each_isoform.start, " | end = ", each_isoform.end #test MultiIsoform & Isoform - make sure the correct isoform # obj_mi = MultiIsoform( chrom, start, end ) # print "sj_pos = ", sj_pos # for k,v in obj_mi.hash_isoforms.iteritems(): #k = isoform ID, v = Isoform instance # print "isoform = ", k # for k2,v2 in v.hashExonList.iteritems():