Example #1
0
def gene2exon_list (cursor, gene_id, db_name=None, verbose=False):

    exons = []

    if (db_name): 
        if not switch_to_db(cursor, db_name):
            return False

    qry  = "select * from gene2exon where gene_id = %d " % gene_id
    rows = search_db(cursor, qry)

    if (not rows):
        rows = search_db(cursor, 'select database()')
        if verbose:
            print "database ", rows[0][0]
            rows = search_db(cursor, qry, verbose = True)
            print rows
        return []

    for row in rows:
        exon = Exon()
        if (not exon.load_from_gene2exon(row)):
            continue
        exons.append(exon)

    return exons
Example #2
0
def gene2exon_list(cursor, gene_id, db_name=None, verbose=False):

    exons = []

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select * from gene2exon where gene_id = %d " % gene_id
    rows = search_db(cursor, qry)

    if not rows:
        rows = search_db(cursor, "select database()")
        if verbose:
            print "database ", rows[0][0]
            rows = search_db(cursor, qry, verbose=True)
            print rows
        return []

    for row in rows:
        exon = Exon()
        if not exon.load_from_gene2exon(row):
            continue
        exons.append(exon)

    return exons
Example #3
0
def get_predicted_exons(cursor, gene_id, species):

    exons = []

    # get the region on the gene
    ret = get_gene_region(cursor, gene_id)
    if ret:
        [gene_seq_id, gene_region_start, gene_region_end, gene_region_strand] = ret
    else:
        print "region not retrived for ", species, gene_id
        return []

    qry = "SELECT  * FROM  prediction_exon  WHERE seq_region_id = %d " % gene_seq_id
    qry += " AND  seq_region_start >= %d AND seq_region_start <= %d " % (gene_region_start, gene_region_end)
    qry += " AND  seq_region_end   >= %d AND seq_region_end   <= %d " % (gene_region_start, gene_region_end)
    rows = search_db(cursor, qry)

    if not rows:
        return []
    for row in rows:
        exon = Exon()
        exon.gene_id = gene_id
        exon.load_from_ensembl_prediction(gene_region_start, gene_region_end, row)
        exons.append(exon)

    return exons
Example #4
0
def get_predicted_exons (cursor, gene_id, species):

    exons = []

    # get the region on the gene
    ret = get_gene_region (cursor, gene_id)
    if  ret:
        [gene_seq_id, gene_region_start, gene_region_end, 
         gene_region_strand] = ret
    else:
        print "region not retrived for ", species, gene_id
        return []

    qry    = "SELECT  * FROM  prediction_exon  WHERE seq_region_id = %d "  %  gene_seq_id
    qry   += " AND  seq_region_start >= %d AND seq_region_start <= %d " %  \
        (gene_region_start, gene_region_end)
    qry   += " AND  seq_region_end   >= %d AND seq_region_end   <= %d " %  \
        (gene_region_start, gene_region_end)
    rows   = search_db (cursor, qry)

    if (not rows):
        return []
    for row in rows:
        exon         = Exon()
        exon.gene_id = gene_id
        exon.load_from_ensembl_prediction (gene_region_start, gene_region_end, row)
        exons.append(exon)
 
    return exons
Example #5
0
def get_exon (cursor, exon_id, is_known=None, db_name=None):

    exon = Exon ()

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return exon

    if is_known==2:
        # sw# exon
        qry  = "select * from sw_exon where exon_id = %d"   % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_novel_exon (rows[0], "sw_exon")
    elif is_known==3:
        # sw# exon
        qry  = "select * from usearch_exon where exon_id = %d"   % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_novel_exon (rows[0], "usearch_exon")
    else:
        qry  = "select * from gene2exon where exon_id = %d" % exon_id
        if is_known: qry += " and is_known = %s " % is_known
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_gene2exon (rows[0])

    return exon
Example #6
0
def get_novel_exons (cursor, gene_id, table):

    exons = []

    qry  = "select * from %s " % table
    qry += " where gene_id = %d " % int(gene_id)
    rows = search_db (cursor, qry)
    if not rows: return exons

    for row in rows:
        exon         = Exon()
        exon.load_from_novel_exon (row, table)
        exons.append(exon)
    return exons
Example #7
0
def get_exon(cursor, exon_id, is_known=None, db_name=None):

    exon = Exon()

    if db_name:
        if not switch_to_db(cursor, db_name):
            return exon

    if is_known == 2:
        # sw# exon
        qry = "select * from sw_exon where exon_id = %d" % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_novel_exon(rows[0], "sw_exon")
    elif is_known == 3:
        # sw# exon
        qry = "select * from usearch_exon where exon_id = %d" % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_novel_exon(rows[0], "usearch_exon")
    else:
        qry = "select * from gene2exon where exon_id = %d" % exon_id
        if is_known:
            qry += " and is_known = %s " % is_known
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_gene2exon(rows[0])

    return exon
Example #8
0
def get_known_exons (cursor, gene_id, species):

    exons = []

    qry  = "select distinct exon_transcript.exon_id from  exon_transcript, transcript "
    qry += " where exon_transcript.transcript_id = transcript.transcript_id "
    qry += " and transcript.gene_id = %d " % gene_id

    rows = search_db (cursor, qry)
    
    if (not rows ):
        return []
    if ('Error' in rows[0]):
        search_db (cursor, qry, verbose = True)
        return []

    # get the region on the gene
    ret = get_gene_region (cursor, gene_id)
    if  ret:
        [gene_seq_id, gene_region_start, gene_region_end, 
         gene_region_strand] = ret
    else:
        print "region not retrived for ", species, gene_id
        return []

    exon_ids = []
    for row in rows:
        exon_ids.append(row[0])

    for exon_id in exon_ids:
        qry = "select * from exon where exon_id=%d" % exon_id
        rows = search_db (cursor, qry)
        if (not rows or 'Error' in rows[0]):
            search_db (cursor, qry, verbose = True)
            continue
        exon         = Exon()
        exon.gene_id = gene_id
        exon.load_from_ensembl_exon (gene_region_start, gene_region_end, rows[0])
        exons.append(exon)

    return exons
    def extract_regions_from_annotations(self, annotation_lines, region_name):
        ''' gets the annotation file lines
            returns a dictionary of exons positions, keys are the start position
            value is a list [start_pos, end_pos, chromosome,
            strand(- or + ), and the name of the gene]
            also returns the sorted keys of this dictionary (sorted start positions)
        '''
        exons = ExonsList()

        for idx, line in enumerate(annotation_lines):
            line = line.split('\t')
            if line[2] == region_name:
                exon = Exon(line)
                exons.add_exon(exon)

            self.print_progres(idx, len(annotation_lines))

        sys.stderr.write("\n")
        return exons
Example #10
0
 def add_exon(self, chr_map, sequence=''):
     exon = Exon(self, chr_map, sequence)
     self.exons.append(exon)
     return
Example #11
0
def map2exon(cursor, ensembl_db_name, map, paralogue=False):

    # this is fake exon info! to be passe to get_exon_pepseq
    exon = Exon ()
    exon.exon_id     = map.exon_id_2
    exon.is_known    = map.exon_known_2
    if map.source == 'sw_sharp':
        exon.analysis_id = -1 
        if not paralogue:  # move to the other species
            rows = switch_to_db (cursor, ensembl_db_name[map.species_2])
            if  not rows:
                exon.exon_seq_id = -1
                return exon
        else:
            qry  = "select exon_seq_id from sw_exon where exon_id = %d " % exon.exon_id 
            rows = search_db (cursor, qry)
            if not rows or not rows[0][0]:
                exon.exon_seq_id = -1
            else:
                exon.exon_seq_id = int(rows[0][0])

    elif map.source == 'usearch':
        exon.analysis_id = -2
        if not paralogue: 
            rows = switch_to_db (cursor, ensembl_db_name[map.species_2])
            if  not rows:
                exon.exon_seq_id = -1
                return exon
        else:
            qry  = "select exon_seq_id from usearch_exon where exon_id = %d " % exon.exon_id 
            rows = search_db (cursor, qry)
            if not rows or not rows[0][0]:
                exon.exon_seq_id = -1
            else:
                exon.exon_seq_id = int(rows[0][0])
    else:
        exon.analysis_id = 1
 
    return exon