Example #1
0
def get_exon (cursor, exon_id, is_known=None, db_name=None):

    exon = Exon ()

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return exon

    if is_known==2:
        # sw# exon
        qry  = "select * from sw_exon where exon_id = %d"   % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_novel_exon (rows[0], "sw_exon")
    elif is_known==3:
        # sw# exon
        qry  = "select * from usearch_exon where exon_id = %d"   % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_novel_exon (rows[0], "usearch_exon")
    else:
        qry  = "select * from gene2exon where exon_id = %d" % exon_id
        if is_known: qry += " and is_known = %s " % is_known
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_gene2exon (rows[0])

    return exon
Example #2
0
def taxid2parentid (cursor, tax_id):
    qry = "select parent_tax_id from nodes where tax_id= %d " % tax_id
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return rows[0][0]
Example #3
0
def get_species (cursor):

    ensembl_db_name = {}
    all_species     = []

    # find the release number
    qry  = "select value from exolocator_config.parameter where name = 'ensembl_release_number'"
    rows = search_db(cursor, qry)
    if not rows or 'error' in rows[0][0].lower():
        print 'ensembl_release_number not set in exolocator_config'
        exit(1)
    release_number = rows[0][0]

    
    qry  = "show databases like '%core_{0}%'".format(release_number)
    rows = search_db(cursor, qry)
    if (not rows):
        print "No databases with 'core_{0}' in the name found".format(release_number)
        exit(1)

    for row in rows:
        db_name    = row[0]
        name_token = db_name.split ('_')
        species = name_token[0]
        i = 1
        while not name_token[i] == 'core':
            species += "_"+ name_token[i]
            i       += 1
        ensembl_db_name[species] = db_name
        all_species.append(species)

    return all_species, ensembl_db_name
Example #4
0
def  get_alt_seq_info (cursor, gene_id, species):

    # seq identifier from gene table
    qry  = "select seq_region_id, seq_region_start, seq_region_end,  seq_region_strand from gene where gene_id = %d" % gene_id
    rows = search_db (cursor, qry)
    if not rows: return []

    [seq_region_id, orig_seq_region_start, orig_seq_region_end, seq_region_strand] = rows[0]
    
    # check whether we have "assembly exception"
    # we do not want 'PAR' regions, though:
    '''
    The pseudo-autosomal regions are homologous DNA sequences on the (human) X and Y chromosomes. 
    They allow the pairing and crossing-over of these sex chromosomes the same way the autosomal 
    chromosomes do during meiosis. 
    As these genomic regions are identical between X and Y, they are oftentimes only stored once.
    The exception types we are interested in are PATCH_FIX and  PATCH_NOVEL
    '''
    qry  = "select  seq_region_start,  seq_region_end, exc_seq_region_id, exc_seq_region_start,  exc_seq_region_end "
    qry += "from assembly_exception where seq_region_id = %d " % seq_region_id
    qry += "and  assembly_exception.exc_type  like 'PATCH_%'"
    rows = search_db (cursor, qry)
    if not rows: return []
    [seq_region_start,  seq_region_end, exc_seq_region_id, exc_seq_region_start,  exc_seq_region_end] = rows[0]

    qry  = "select name, file_name from seq_region where seq_region_id= %d" %  exc_seq_region_id
    rows = search_db (cursor, qry)
    if not rows: return []
    [seq_name, file_names] = rows[0]

    mitochondrial = is_mitochondrial (cursor, gene_id)

    return [seq_name, file_names, seq_region_start, seq_region_end, seq_region_strand, mitochondrial]
Example #5
0
def get_exon_pepseq(cursor, exon, db_name=None, verbose=False):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    if exon.exon_seq_id:
        exon_seq_id = exon.exon_seq_id
        qry = "select protein_seq "
        qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    else:
        exon_id = exon.exon_id
        is_known = exon.is_known
        qry = "select protein_seq  "
        qry += " from exon_seq where exon_id = %d and is_known = %d" % (exon_id, is_known)

    rows = search_db(cursor, qry)

    if not rows:
        if verbose:
            rows = search_db(cursor, qry, verbose=True)
            print rows
        return ""

    protein_seq = rows[0][0]
    if protein_seq is None:
        protein_seq = ""

    return protein_seq
Example #6
0
def get_exon_pepseq (cursor, exon, db_name=None, verbose=False):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    if exon.exon_seq_id:
        exon_seq_id = exon.exon_seq_id
        qry  = "select protein_seq "
        qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    else:
        exon_id  = exon.exon_id
        is_known = exon.is_known
        qry  = "select protein_seq  "
        qry += " from exon_seq where exon_id = %d and is_known = %d" % (exon_id, is_known)

        
    rows = search_db(cursor, qry)


    if (not rows):
        if verbose:
            rows = search_db(cursor, qry, verbose = True)
            print rows
        return ""


    protein_seq = rows[0][0]
    if (protein_seq is None):
        protein_seq = ""
  
    return protein_seq
Example #7
0
def is_reference(cursor, gene_id, non_ref_id, db_name=None):

    if db_name:
        qry = "use %s " % db_name
        rows = search_db(cursor, qry)
        if rows:
            rows = search_db(cursor, qry, verbose=True)
            print rows
            exit(1)
    qry = "select seq_region_id from gene where gene_id=%d" % int(gene_id)
    rows = search_db(cursor, qry)
    if not rows:
        return True

    seq_region_id = rows[0]

    qry = "select attrib_type_id from  seq_region_attrib where seq_region_id=%s" % seq_region_id
    rows = search_db(cursor, qry)
    if not rows:
        return True

    for row in rows:
        attrib_type_id = int(row[0])
        if attrib_type_id == non_ref_id:
            return False

    return True
Example #8
0
def get_species(cursor):

    ensembl_db_name = {}
    all_species = []

    # find the release number
    qry = "select value from exolocator_config.parameter where name = 'ensembl_release_number'"
    rows = search_db(cursor, qry)
    if not rows or "error" in rows[0][0].lower():
        print "ensembl_release_number not set in exolocator_config"
        exit(1)
    release_number = rows[0][0]

    qry = "show databases like '%core_{0}%'".format(release_number)
    rows = search_db(cursor, qry)
    if not rows:
        print "No databases with 'core_{0}' in the name found".format(release_number)
        exit(1)

    for row in rows:
        db_name = row[0]
        name_token = db_name.split("_")
        species = name_token[0]
        i = 1
        while not name_token[i] == "core":
            species += "_" + name_token[i]
            i += 1
        ensembl_db_name[species] = db_name
        all_species.append(species)

    return all_species, ensembl_db_name
Example #9
0
def gene2exon_list(cursor, gene_id, db_name=None, verbose=False):

    exons = []

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select * from gene2exon where gene_id = %d " % gene_id
    rows = search_db(cursor, qry)

    if not rows:
        rows = search_db(cursor, "select database()")
        if verbose:
            print "database ", rows[0][0]
            rows = search_db(cursor, qry, verbose=True)
            print rows
        return []

    for row in rows:
        exon = Exon()
        if not exon.load_from_gene2exon(row):
            continue
        exons.append(exon)

    return exons
Example #10
0
def get_exon(cursor, exon_id, is_known=None, db_name=None):

    exon = Exon()

    if db_name:
        if not switch_to_db(cursor, db_name):
            return exon

    if is_known == 2:
        # sw# exon
        qry = "select * from sw_exon where exon_id = %d" % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_novel_exon(rows[0], "sw_exon")
    elif is_known == 3:
        # sw# exon
        qry = "select * from usearch_exon where exon_id = %d" % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_novel_exon(rows[0], "usearch_exon")
    else:
        qry = "select * from gene2exon where exon_id = %d" % exon_id
        if is_known:
            qry += " and is_known = %s " % is_known
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_gene2exon(rows[0])

    return exon
Example #11
0
def get_ncbi_tax_name(cursor):
    qry = "show databases like 'ncbi%tax%'"
    rows = search_db(cursor, qry)
    if (not rows):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return rows[0][0]
Example #12
0
def get_ncbi_tax_name (cursor):
    qry = "show databases like 'ncbi%tax%'"
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return rows[0][0]
Example #13
0
def gene2exon_list (cursor, gene_id, db_name=None, verbose=False):

    exons = []

    if (db_name): 
        if not switch_to_db(cursor, db_name):
            return False

    qry  = "select * from gene2exon where gene_id = %d " % gene_id
    rows = search_db(cursor, qry)

    if (not rows):
        rows = search_db(cursor, 'select database()')
        if verbose:
            print "database ", rows[0][0]
            rows = search_db(cursor, qry, verbose = True)
            print rows
        return []

    for row in rows:
        exon = Exon()
        if (not exon.load_from_gene2exon(row)):
            continue
        exons.append(exon)

    return exons
Example #14
0
def get_canonical_coordinates(cursor, canonical_transcript_id):
    qry = "select seq_start, start_exon_id,  seq_end, end_exon_id "
    qry += " from translation where transcript_id = %d " % canonical_transcript_id
    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return []
    return rows[0]
Example #15
0
def get_canonical_coordinates (cursor, canonical_transcript_id):
    qry = "select seq_start, start_exon_id,  seq_end, end_exon_id "
    qry += " from translation where transcript_id = %d " % canonical_transcript_id
    rows = search_db (cursor, qry)
    if ( not rows):
         search_db (cursor, qry, verbose = True)
         return []
    return rows[0]
Example #16
0
def get_orthologues_from_species(cursor, ensembl_db_name, ortho_type, gene_member_id, species):

    # the ortho_type is one of the following: 'ortholog_one2one', 
    # 'ortholog_one2many', 'ortholog_many2many', 'possible_ortholog', 'apparent_ortholog_one2one'
    orthos = []

    # find genome db_id
    genome_db_id = species2genome_db_id (cursor, species)

    # make the cursor point to compara database - should be the responsibility of each function
    switch_to_db (cursor, get_compara_name (cursor))

    qry  = "select homology.homology_id from homology_member, homology "
    qry += " where homology_member.gene_member_id =%d " % gene_member_id
    qry += " and homology.homology_id = homology_member.homology_id "
    qry += " and  homology.description = '%s' "    % ortho_type
    rows = search_db (cursor, qry)

    if (not rows):
        return [] # no orthologs here

    # for each homology id find the other member id
    #print qry
    #print member_id, ortho_type, species, genome_db_id
    #print rows
    for row in rows:
        homology_id = row[0]
        #print "\t homology id:", homology_id
        switch_to_db (cursor, get_compara_name (cursor))
        qry  = "select gene_member_id from homology_member "
        qry += " where homology_id = %d"  % int(homology_id)
        qry += " and not  gene_member_id = %d" % gene_member_id

        rows2  = search_db (cursor, qry, verbose = False)
        if (not rows2):
            #print "\t ",
            #rows2 = search_db (cursor, qry, verbose = True)
            continue
        for row2 in rows2:
            ortho_id     = row2[0]
            #print "\t\t ortho id:", ortho_id
            qry  = "select  stable_id  from gene_member  "
            qry += " where gene_member_id = %d "  % ortho_id
            qry += " and genome_db_id = %d " % genome_db_id
            rows3 = search_db (cursor, qry, verbose = False)
            if (not rows3):
                #print "\t\t ",
                #rows3 = search_db (cursor, qry, verbose = True)
                continue
            ortho_stable  = rows3[0][0]
            #print "\t\t ortho stable:", ortho_stable
            orthos.append(ortho_stable)
    if orthos:    
        switch_to_db (cursor, ensembl_db_name [species])
        orthos = map  (lambda gene_id:  stable2gene(cursor, gene_id), orthos)
    #print 'orthos:', orthos
    return orthos
Example #17
0
def taxid2name(cursor, tax_id):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'scientific name'"
    rows = search_db(cursor, qry)
    if (not rows):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return rows[0][0]
Example #18
0
def taxid2trivial(cursor, tax_id):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'trivial'"
    rows = search_db(cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return rows[0][0]
Example #19
0
def get_orthologues_from_species(cursor, ensembl_db_name, ortho_type, gene_member_id, species):

    # the ortho_type is one of the following: 'ortholog_one2one',
    # 'ortholog_one2many', 'ortholog_many2many', 'possible_ortholog', 'apparent_ortholog_one2one'
    orthos = []

    # find genome db_id
    genome_db_id = species2genome_db_id(cursor, species)

    # make the cursor point to compara database - should be the responsibility of each function
    switch_to_db(cursor, get_compara_name(cursor))

    qry = "select homology.homology_id from homology_member, homology "
    qry += " where homology_member.gene_member_id =%d " % gene_member_id
    qry += " and homology.homology_id = homology_member.homology_id "
    qry += " and  homology.description = '%s' " % ortho_type
    rows = search_db(cursor, qry)

    if not rows:
        return []  # no orthologs here

    # for each homology id find the other member id
    # print qry
    # print member_id, ortho_type, species, genome_db_id
    # print rows
    for row in rows:
        homology_id = row[0]
        # print "\t homology id:", homology_id
        switch_to_db(cursor, get_compara_name(cursor))
        qry = "select gene_member_id from homology_member "
        qry += " where homology_id = %d" % int(homology_id)
        qry += " and not  gene_member_id = %d" % gene_member_id

        rows2 = search_db(cursor, qry, verbose=False)
        if not rows2:
            # print "\t ",
            # rows2 = search_db (cursor, qry, verbose = True)
            continue
        for row2 in rows2:
            ortho_id = row2[0]
            # print "\t\t ortho id:", ortho_id
            qry = "select  stable_id  from gene_member  "
            qry += " where gene_member_id = %d " % ortho_id
            qry += " and genome_db_id = %d " % genome_db_id
            rows3 = search_db(cursor, qry, verbose=False)
            if not rows3:
                # print "\t\t ",
                # rows3 = search_db (cursor, qry, verbose = True)
                continue
            ortho_stable = rows3[0][0]
            # print "\t\t ortho stable:", ortho_stable
            orthos.append(ortho_stable)
    if orthos:
        switch_to_db(cursor, ensembl_db_name[species])
        orthos = map(lambda gene_id: stable2gene(cursor, gene_id), orthos)
    # print 'orthos:', orthos
    return orthos
Example #20
0
def trivial2taxid (cursor, trivial_name):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry  = "select tax_id from names where name_txt= '%s' " % trivial_name
    qry += " and name_class = 'trivial'";
    rows = search_db (cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return int(rows[0][0])
Example #21
0
def trivial2taxid(cursor, trivial_name):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select tax_id from names where name_txt= '%s' " % trivial_name
    qry += " and name_class = 'trivial'"
    rows = search_db(cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return int(rows[0][0])
Example #22
0
def taxid2trivial (cursor, tax_id):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry  = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'trivial'";
    rows = search_db (cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return rows[0][0]
Example #23
0
def taxid2name (cursor, tax_id):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry  = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'scientific name'";
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return rows[0][0]
Example #24
0
def get_gene_ids(cursor, db_name=None, biotype=None, is_known=None, ref_only=False):

    gene_ids = []

    if db_name:
        qry = "use %s " % db_name
        rows = search_db(cursor, qry)
        if rows:
            rows = search_db(cursor, qry, verbose=True)
            print rows
            exit(1)

    qry = "select gene_id from gene"

    if biotype or not is_known is None:
        qry += " where "
        if biotype:
            qry += "biotype='%s'" % biotype
        if biotype and not is_known is None:
            qry += " and "
        if not is_known is None:
            if is_known:
                qry += "status='known'"
            else:
                qry += "status!='known'"

    rows = search_db(cursor, qry, verbose=False)

    if not rows:
        rows = search_db(cursor, qry, verbose=True)
        return []
    else:
        if "Error" in rows[0]:
            print rows[0]
            return []

        # I don't want to hard code the id for the annotation "non_ref"
        # and I do no know where else to do it, so qe do it here
        if ref_only:
            qry = "select attrib_type_id from attrib_type where code='non_ref'"
            rows2 = search_db(cursor, qry)
            if not rows2 or not type(rows2[0][0]) is int:
                ref_only = False
            else:
                non_ref_id = int(rows2[0][0])

        for row in rows:
            if not type(row[0]) is long:
                print row
                exit(1)
            gene_id = int(row[0])
            if ref_only and not is_reference(cursor, gene_id, non_ref_id):
                continue
            gene_ids.append(gene_id)

    return gene_ids
Example #25
0
def get_gene_ids (cursor, db_name=None, biotype = None, is_known = None, ref_only = False):

   gene_ids = []
    
   if  (db_name):
      qry  = "use %s " % db_name
      rows = search_db (cursor, qry)
      if (rows):
         rows = search_db (cursor, qry, verbose = True)
         print rows
         exit (1)

   qry = "select gene_id from gene"

   if ( biotype or not is_known is None):
      qry +=  " where "
      if ( biotype):
         qry += "biotype='%s'" % biotype
      if (biotype and not is_known is None):
         qry += " and "
      if (not is_known is None):
         if is_known:
            qry += "status='known'"
         else:
            qry += "status!='known'"
                
   rows = search_db (cursor, qry, verbose = False)
    
   if (not rows):
      rows = search_db (cursor, qry, verbose = True)
      return []
   else:
      if ('Error' in rows[0]):
         print rows[0]
         return []

      # I don't want to hard code the id for the annotation "non_ref"
      # and I do no know where else to do it, so qe do it here
      if ref_only:
         qry  = "select attrib_type_id from attrib_type where code='non_ref'"
         rows2 = search_db (cursor, qry)
         if not rows2 or not type(rows2[0][0]) is int:
            ref_only = False
         else:
            non_ref_id = int (rows2[0][0])
                
      for row in rows:
         if ( not type(row[0]) is long ):
            print row
            exit(1)
         gene_id = int(row[0])
         if ref_only and not is_reference(cursor, gene_id, non_ref_id): continue
         gene_ids.append(gene_id)
    
   return gene_ids
Example #26
0
def get_species_shorthand(cursor, species):
    
   db_name = get_compara_name (cursor)
   qry = "use %s " % db_name
   search_db (cursor, qry)

   qry = "select shorthand from species_name_shorthands where species='%s'" % species
   rows = search_db (cursor, qry)
   if not rows: return ""

   return rows[0][0]
Example #27
0
def member2stable(cursor, member_id):

    # member_id refers to compara db
    # of which we need to have one
    qry = "select  stable_id from gene_member where gene_member_id = %d" % member_id
    rows = search_db(cursor, qry)
    if not rows:
        rows = search_db(cursor, qry, verbose=True)
        return ""

    return rows[0][0]
Example #28
0
def member2stable (cursor, member_id):
    
    # member_id refers to compara db
    # of which we need to have one
    qry = "select  stable_id from gene_member where gene_member_id = %d" % member_id
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""

    return rows[0][0]
Example #29
0
def species2genome_db_id(cursor, species):

    switch_to_db(cursor, get_compara_name(cursor))

    qry = "select genome_db_id from genome_db where name = '%s'" % species

    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return 0

    return int(rows[0][0])
Example #30
0
def genome_db_id2species(cursor, genome_db_id):

    switch_to_db(cursor, get_compara_name(cursor))

    qry = "select name from genome_db where genome_db_id = %d" % int(genome_db_id)

    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return ""

    return rows[0][0]
Example #31
0
def taxid2parentid (cursor, tax_id):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry = "select parent_tax_id from nodes where tax_id= %d " % int(tax_id)
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
Example #32
0
def taxid2parentid(cursor, tax_id):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select parent_tax_id from nodes where tax_id= %d " % int(tax_id)
    rows = search_db(cursor, qry)
    if (not rows):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
Example #33
0
def species2genome_db_id (cursor, species):


    switch_to_db (cursor, get_compara_name (cursor))

    qry  = "select genome_db_id from genome_db where name = '%s'" % species

    rows = search_db (cursor, qry)
    if (not rows):
        search_db (cursor, qry, verbose = True)
        return 0
    
    return int(rows[0][0])
Example #34
0
def genome_db_id2species (cursor, genome_db_id):


    switch_to_db (cursor, get_compara_name (cursor))

    qry  = "select name from genome_db where genome_db_id = %d" % int(genome_db_id)

    rows = search_db (cursor, qry)
    if (not rows):
        search_db (cursor, qry, verbose = True)
        return ""
    
    return rows[0][0]
Example #35
0
def stable2member (cursor, stable_id):
    
    # member_id refers to compara db
    # of which we need to have one
    # qry = "select  member_id from member where stable_id = '%s'" % stable_id
    # since version 76 the table is called gene_member, and the main id is gene_member_id
    qry = "select  gene_member_id from gene_member where stable_id = '%s'" % stable_id
    rows = search_db (cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db (cursor, qry, verbose = True)
        exit(1)
        return ""
    
    return int(rows[0][0])
Example #36
0
def species2taxid (cursor, species):

    switch_to_db (cursor, get_compara_name (cursor))
    qry  = "select taxon_id from genome_db where name = '%s'" % species
    rows = search_db (cursor, qry)
    if (not rows):
        search_db (cursor, qry, verbose = True)
        return ""
    
    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
Example #37
0
def species2taxid(cursor, species):

    switch_to_db(cursor, get_compara_name(cursor))
    qry = "select taxon_id from genome_db where name = '%s'" % species
    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return ""

    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
Example #38
0
def stable2member(cursor, stable_id):

    # member_id refers to compara db
    # of which we need to have one
    # qry = "select  member_id from member where stable_id = '%s'" % stable_id
    # since version 76 the table is called gene_member, and the main id is gene_member_id
    qry = "select  gene_member_id from gene_member where stable_id = '%s'" % stable_id
    rows = search_db(cursor, qry)
    if not rows or "ERROR" in rows[0]:
        rows = search_db(cursor, qry, verbose=True)
        exit(1)
        return ""

    return int(rows[0][0])
Example #39
0
def gene2canon_transl(cursor, gene_id, db_name=None,):

    if  (db_name and not switch_to_db(cursor, db_name)):
            return False
 
    qry  = "select translation.translation_id  from translation, gene "
    qry += " where gene.canonical_transcript_id = translation.transcript_id "
    qry += " and gene.gene_id = %d " % gene_id
    rows = search_db (cursor, qry, verbose = False)
    
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""

    return  rows[0][0]
Example #40
0
def gene2stable_canon_transl(cursor, gene_id, db_name=None):

    if db_name and not switch_to_db(cursor, db_name):
        return False

    qry = "select translation.stable_id  from translation, gene "
    qry += " where gene.canonical_transcript_id = translation.transcript_id "
    qry += " and gene.gene_id = %d " % gene_id
    rows = search_db(cursor, qry, verbose=False)

    if not rows:
        rows = search_db(cursor, qry, verbose=True)
        return ""

    return rows[0][0]
Example #41
0
def get_exon_seq_by_db_id(cursor, exon_seq_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, "
    qry += " left_flank, right_flank, dna_seq  "
    qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    rows = search_db(cursor, qry)
    if not rows:
        # rows = search_db(cursor, qry, verbose = True)
        return []

    [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq] = rows[0]
    if protein_seq is None:
        protein_seq = ""
    if left_flank is None:
        left_flank = ""
    if right_flank is None:
        right_flank = ""
    if dna_seq is None:
        dna_seq = ""

    return [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq]
Example #42
0
def stable2gene (cursor, stable_id=None, db_name=None):

    if (not stable_id):
        return 0

    if (db_name and not switch_to_db(cursor, db_name)):
            return False

    qry = "select gene_id from gene where stable_id='%s'" % stable_id
    rows = search_db (cursor, qry, verbose = False)
    
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return 0

    return int(rows[0][0])
Example #43
0
def check_ccds(cursor, transcript_stable_id="", transcript_id=""):

    ccds = ""

    qry = "select dna_align_feature.hit_name "
    qry += "from dna_align_feature, transcript, transcript_supporting_feature "
    qry += "   where dna_align_feature.dna_align_feature_id =  transcript_supporting_feature.feature_id "
    qry += "   and transcript_supporting_feature.feature_type ='dna_align_feature' "
    qry += "   and transcript_supporting_feature.transcript_id =transcript.transcript_id "
    if transcript_stable_id:
        qry += "   and transcript.stable_id = '%s' " % transcript_stable_id
    elif transcript_id:
        qry += "   and transcript.transcript_id = '%s' " % transcript_id
    else:
        return ccds

    rows = search_db(cursor, qry)

    if not rows:
        return ccds

    for row in rows:
        if "CCDS" in row[0]:
            ccds = row[0]

    return ccds
Example #44
0
def get_predicted_exons(cursor, gene_id, species):

    exons = []

    # get the region on the gene
    ret = get_gene_region(cursor, gene_id)
    if ret:
        [gene_seq_id, gene_region_start, gene_region_end, gene_region_strand] = ret
    else:
        print "region not retrived for ", species, gene_id
        return []

    qry = "SELECT  * FROM  prediction_exon  WHERE seq_region_id = %d " % gene_seq_id
    qry += " AND  seq_region_start >= %d AND seq_region_start <= %d " % (gene_region_start, gene_region_end)
    qry += " AND  seq_region_end   >= %d AND seq_region_end   <= %d " % (gene_region_start, gene_region_end)
    rows = search_db(cursor, qry)

    if not rows:
        return []
    for row in rows:
        exon = Exon()
        exon.gene_id = gene_id
        exon.load_from_ensembl_prediction(gene_region_start, gene_region_end, row)
        exons.append(exon)

    return exons
Example #45
0
def get_exon_seq_by_db_id (cursor, exon_seq_id, db_name=None):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    qry  = "select exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, "
    qry += " left_flank, right_flank, dna_seq  "
    qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    rows = search_db(cursor, qry)
    if (not rows):
        #rows = search_db(cursor, qry, verbose = True)
        return []

    [exon_seq_id, protein_seq, pepseq_transl_start, 
     pepseq_transl_end, left_flank, right_flank, dna_seq] = rows[0]
    if (protein_seq is None):
        protein_seq = ""
    if (left_flank  is None):
        left_flank = ""
    if (right_flank is None):
        right_flank = ""
    if (dna_seq is None):
        dna_seq = ""
    
    return [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq]
Example #46
0
def find_mammals(cursor, trivial_name_list):

    mammals = []
    for trivial_name in trivial_name_list:
        switch_to_db(cursor, get_compara_name(cursor))
        tax_id = trivial2taxid(cursor, trivial_name)
        parent_id = taxid2parentid(cursor, tax_id)

        tax_id = parent_id
        is_mammal = False
        while tax_id:
            qry = "select name_txt from names where tax_id= %d " % int(tax_id)
            qry += " and name_class = 'scientific name'"
            rows = search_db(cursor, qry)
            if rows and rows[0][0]:
                if 'mammal' in rows[0][0].lower():
                    is_mammal = True
                    break
                elif 'vertebrat' in rows[0][0].lower():
                    # if the thing wasa mammal, we would have found it by now
                    is_mammal = False
                    break

            parent_id = taxid2parentid(cursor, tax_id)
            if parent_id and parent_id > 1:
                tax_id = parent_id
            else:
                tax_id = None

        if is_mammal:
            mammals.append(trivial_name)

    return mammals
Example #47
0
def check_ccds (cursor, transcript_stable_id = "", transcript_id = ""):

    ccds = ""

    qry  = "select dna_align_feature.hit_name "
    qry += "from dna_align_feature, transcript, transcript_supporting_feature "
    qry += "   where dna_align_feature.dna_align_feature_id =  transcript_supporting_feature.feature_id "
    qry += "   and transcript_supporting_feature.feature_type ='dna_align_feature' "
    qry += "   and transcript_supporting_feature.transcript_id =transcript.transcript_id "
    if (transcript_stable_id):
        qry += "   and transcript.stable_id = '%s' " % transcript_stable_id
    elif (transcript_id):
        qry += "   and transcript.transcript_id = '%s' " % transcript_id
    else:
        return ccds
        
    rows = search_db(cursor, qry)

    if not rows:
        return ccds

    for row in rows:
        if 'CCDS' in row[0]:
            ccds = row[0]

    return  ccds
Example #48
0
def stable2gene(cursor, stable_id=None, db_name=None):

    if not stable_id:
        return 0

    if db_name and not switch_to_db(cursor, db_name):
        return False

    qry = "select gene_id from gene where stable_id='%s'" % stable_id
    rows = search_db(cursor, qry, verbose=False)

    if not rows:
        rows = search_db(cursor, qry, verbose=True)
        return 0

    return int(rows[0][0])
Example #49
0
def get_common_name(cursor, species):
    switch_to_db(cursor, get_compara_name(cursor))
    tax_id = species2taxid(cursor, species)
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where "
    qry += "tax_id = %d and " % tax_id
    qry += "name_class = 'genbank common name'"
    rows = search_db(cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db(cursor, qry, verbose=True)
            return ""
        else:
            return rows[0][0]

    return ""
Example #50
0
def get_common_name (cursor, species):
    switch_to_db(cursor, get_compara_name(cursor))
    tax_id = species2taxid (cursor, species)
    switch_to_db(cursor,get_ncbi_tax_name (cursor))
    qry   = "select name_txt from names where "
    qry  += "tax_id = %d and " % tax_id
    qry  += "name_class = 'genbank common name'"
    rows = search_db (cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db (cursor, qry, verbose = True)
            return ""
        else:
            return rows[0][0]

    return ""
Example #51
0
def get_gene_coordinates(cursor, gene_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return None

    qry = "select seq_region_id, seq_region_start, seq_region_end, seq_region_strand  "
    qry += " from gene "
    qry += " where gene_id = %d" % gene_id
    rows = search_db(cursor, qry)

    if not rows or isinstance(rows[0], str) and "error" in rows[0].lower():
        search_db(cursor, qry, verbose=True)
        return None

    return rows[0]
Example #52
0
def get_predicted_exons (cursor, gene_id, species):

    exons = []

    # get the region on the gene
    ret = get_gene_region (cursor, gene_id)
    if  ret:
        [gene_seq_id, gene_region_start, gene_region_end, 
         gene_region_strand] = ret
    else:
        print "region not retrived for ", species, gene_id
        return []

    qry    = "SELECT  * FROM  prediction_exon  WHERE seq_region_id = %d "  %  gene_seq_id
    qry   += " AND  seq_region_start >= %d AND seq_region_start <= %d " %  \
        (gene_region_start, gene_region_end)
    qry   += " AND  seq_region_end   >= %d AND seq_region_end   <= %d " %  \
        (gene_region_start, gene_region_end)
    rows   = search_db (cursor, qry)

    if (not rows):
        return []
    for row in rows:
        exon         = Exon()
        exon.gene_id = gene_id
        exon.load_from_ensembl_prediction (gene_region_start, gene_region_end, row)
        exons.append(exon)
 
    return exons
Example #53
0
def find_mammals(cursor, trivial_name_list):
    
    mammals = []
    for trivial_name in trivial_name_list:
        switch_to_db(cursor, get_compara_name(cursor))
        tax_id = trivial2taxid (cursor, trivial_name)
        parent_id = taxid2parentid (cursor, tax_id)

        tax_id = parent_id
        is_mammal = False
        while tax_id:
            qry  = "select name_txt from names where tax_id= %d " % int(tax_id)
            qry += " and name_class = 'scientific name'";
            rows = search_db (cursor, qry)
            if rows and rows[0][0]:
                if 'mammal' in rows[0][0].lower():
                    is_mammal = True
                    break
                elif 'vertebrat' in  rows[0][0].lower():
                    # if the thing wasa mammal, we would have found it by now
                    is_mammal = False
                    break
               
            parent_id = taxid2parentid (cursor, tax_id)
            if parent_id and parent_id>1:
                tax_id = parent_id
            else:
                tax_id = None

        if is_mammal: 
            mammals.append(trivial_name)

            
    return mammals
Example #54
0
def get_gene_coordinates (cursor, gene_id, db_name=None):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return None

    qry  = "select seq_region_id, seq_region_start, seq_region_end, seq_region_strand  "
    qry += " from gene "
    qry += " where gene_id = %d" %  gene_id
    rows = search_db (cursor, qry)


    if ( not rows or  isinstance(rows[0], str) and 'error' in rows[0].lower()):
         search_db (cursor, qry, verbose = True)
         return None

    return rows[0]
Example #55
0
def get_canonical_transcript_id (cursor, gene_id, db_name=None):

    if db_name and not switch_to_db(cursor, db_name):
        return []

    qry     = "select canonical_transcript_id"
    qry    += " from  gene where gene_id=%d"  %  gene_id
    rows    = search_db (cursor, qry, verbose=False)

    if (not rows):
        rows    = search_db (cursor, qry, verbose=True)
        return ""
    elif ( 'Error' in rows[0]):
        print  rows[0]
        return ""

    return rows[0][0]
Example #56
0
def get_gene_region (cursor, gene_id, is_known=None):

    qry     = "select seq_region_id, seq_region_start, seq_region_end, "
    qry    += " seq_region_strand "
    qry    += " from  gene  where  gene_id=%d"  %  gene_id
    if (not is_known is None and is_known):
        qry  += " and  status='known' "
    rows    = search_db (cursor, qry, verbose=False)

    if (not rows):
        rows = search_db (cursor, qry, verbose=True)
        return []
    elif ( 'Error' in rows[0]):
        print  rows[0]
        return []

    return rows[0]
Example #57
0
def get_compara_name (cursor):

    # find the release number
    qry  = "select value from exolocator_config.parameter where name = 'ensembl_release_number'"
    rows = search_db(cursor, qry)
    if not rows or 'error' in rows[0][0].lower():
        print 'ensembl_release_number not set in exolocator_config'
        exit(1)
    release_number = rows[0][0]

    qry = "show databases like '%compara_{0}%'".format(release_number)
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""

    return rows[0][0]
Example #58
0
def get_gene_region(cursor, gene_id, is_known=None):

    qry = "select seq_region_id, seq_region_start, seq_region_end, "
    qry += " seq_region_strand "
    qry += " from  gene  where  gene_id=%d" % gene_id
    if not is_known is None and is_known:
        qry += " and  status='known' "
    rows = search_db(cursor, qry, verbose=False)

    if not rows:
        rows = search_db(cursor, qry, verbose=True)
        return []
    elif "Error" in rows[0]:
        print rows[0]
        return []

    return rows[0]