Example #1
0
""" Reconstruct transcripts based on Splice Junctions """

#assign all splice junctions to specific gene: go through cruzdb & find end points for each gene --> assign
# g = Genome( 'sqlite:////tmp/hg19.db' )
g = Genome('sqlite:////tmp/hg19_v2.db')
Isoform.set_cruzdb(g)

#retrieve gene & print information on it based on
gene = g.refGene.filter_by(name2='BRAF').all()
# all_genes = g.refGene.filter_by( name2 = 'TTN' ).first()
# all_genes = g.refGene.filter_by( name2 = 'AGRN' ).all()
# all_genes = g.refGene.filter_by( name2 = 'AGRN' ).first()
# all_genes = g.refGene.filter_by( name2 = 'DIXDC1' ).all()

for each_isoform in gene:
    obj_iso = Isoform(each_isoform.name)

    #print name
    print obj_iso.isoform_id, ":", obj_iso.gene_sym

    print "obj_iso = ", obj_iso

    # print "exons: "
    # for i, (k,v) in enumerate( obj_iso.hashExonList.iteritems() ):        #k = feature name, v = Exon object
    #     print i, ": ", k, " -> ", v

    # print "introns: "
    # for i, (k,v) in enumerate( obj_iso.hashIntronList.iteritems() ):        #k = feature name, v = Exon object
    #     print i, ": ", k, " -> ", v

    print "exons: "
Example #2
0
tuple_exons = obj_sj.spliced_elems(isoform_id, False)
print "exons ligated by SJ = ", tuple_exons
hash_sj_exon_info = obj_sj.spliced_elems_position_range(
    isoform_id, False, True)
print "show info about exons ligated by SJ: "
print "range of exons = ", hash_sj_exon_info['str_sj_pos']
print "prev_elem = ", hash_sj_exon_info['prev_elem']
print "next_elem = ", hash_sj_exon_info['next_elem']
print "prev_exon_canon = ", hash_sj_exon_info['prev_exon_canon']
print "next_exon_canon = ", hash_sj_exon_info['next_exon_canon']

##TEST::
# isoform_id = 'NR_024540'
hash_pos = {'chrom': 'chr1', 'pos_oi': 14829}
obj_iso = Isoform(isoform_id, hash_pos)

print "UCSC gene = ", obj_iso.get_ucsc_gene_name(isoform_id)
print "Ensembl transcript ID = ", obj_iso.get_ensembl_gene_name(isoform_id)
ensembl_isoforms = obj_iso.get_ensembl_isoforms(isoform_id)
print "Ensembl gene ID = ", ensembl_isoforms

for i, each_isoform in enumerate(ensembl_isoforms):
    print "Ensembl gene ", i, " | gene name (ENSG) = ", each_isoform.name2, " | isoform (ENST) = ", each_isoform.name, " | start = ", each_isoform.start, " | end = ", each_isoform.end

#test MultiIsoform & Isoform - make sure the correct isoform
# obj_mi = MultiIsoform( chrom, start, end )
# print "sj_pos = ", sj_pos
# for k,v in obj_mi.hash_isoforms.iteritems():        #k = isoform ID, v = Isoform instance
#     print "isoform = ", k
#     for k2,v2 in v.hashExonList.iteritems():