def build(self, cursor): switch_to_db(cursor, get_compara_name(cursor)) for leaf in self.leafs: leaf.tax_id = species2taxid(cursor, leaf.name) leaf.is_leaf = True self.node[leaf.tax_id] = leaf # build the tree using ncbi_tax.nodes # fill in the names using ncbi_tax.names switch_to_db(cursor, get_ncbi_tax_name(cursor)) for leaf in self.leafs: parent_id = taxid2parentid(cursor, leaf.tax_id) leaf.parent_id = parent_id current_id = leaf.tax_id # move to the root while current_id: current_node = self.node[current_id] parent_id = taxid2parentid(cursor, parent_id) if (not parent_id or current_id == parent_id): current_node.is_root = True self.root = self.node[current_id] current_id = None else: # does parent exist by any chance if self.node.has_key(parent_id): parent_node = self.node[parent_id] parent_node.children.append(current_node) # we are done here current_id = None else: # make new node parent_name = taxid2name(cursor, parent_id) parent_node = Node(parent_name) parent_node.tax_id = parent_id # grampa: parent_node.parent_id = taxid2parentid( cursor, parent_id) parent_node.children.append(current_node) self.node[parent_id] = parent_node # attach the current node to the parent current_id = parent_id # shortcircuit nodes with a single child new_root = self.root.__cleanup__() if (new_root): new_root.is_root = True self.root = new_root del_ids = [] for node_id, node in self.node.iteritems(): if node.is_leaf: continue if (not node.children): del_ids.append(node_id) for node_id in del_ids: del self.node[node_id] self.__set_parent_ids__(self.root)
def find_mammals(cursor, trivial_name_list): mammals = [] for trivial_name in trivial_name_list: switch_to_db(cursor, get_compara_name(cursor)) tax_id = trivial2taxid (cursor, trivial_name) parent_id = taxid2parentid (cursor, tax_id) tax_id = parent_id is_mammal = False while tax_id: qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'scientific name'"; rows = search_db (cursor, qry) if rows and rows[0][0]: if 'mammal' in rows[0][0].lower(): is_mammal = True break elif 'vertebrat' in rows[0][0].lower(): # if the thing wasa mammal, we would have found it by now is_mammal = False break parent_id = taxid2parentid (cursor, tax_id) if parent_id and parent_id>1: tax_id = parent_id else: tax_id = None if is_mammal: mammals.append(trivial_name) return mammals
def find_mammals(cursor, trivial_name_list): mammals = [] for trivial_name in trivial_name_list: switch_to_db(cursor, get_compara_name(cursor)) tax_id = trivial2taxid(cursor, trivial_name) parent_id = taxid2parentid(cursor, tax_id) tax_id = parent_id is_mammal = False while tax_id: qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'scientific name'" rows = search_db(cursor, qry) if rows and rows[0][0]: if 'mammal' in rows[0][0].lower(): is_mammal = True break elif 'vertebrat' in rows[0][0].lower(): # if the thing wasa mammal, we would have found it by now is_mammal = False break parent_id = taxid2parentid(cursor, tax_id) if parent_id and parent_id > 1: tax_id = parent_id else: tax_id = None if is_mammal: mammals.append(trivial_name) return mammals
def build (self, cursor): switch_to_db(cursor, get_compara_name(cursor)) for leaf in self.leafs: leaf.tax_id = species2taxid (cursor, leaf.name) leaf.is_leaf = True self.node[leaf.tax_id] = leaf # build the tree using ncbi_tax.nodes # fill in the names using ncbi_tax.names switch_to_db(cursor, get_ncbi_tax_name(cursor)) for leaf in self.leafs: parent_id = taxid2parentid (cursor, leaf.tax_id) leaf.parent_id = parent_id current_id = leaf.tax_id # move to the root while current_id: current_node = self.node[current_id] parent_id = taxid2parentid (cursor, parent_id) if (not parent_id or current_id == parent_id): current_node.is_root = True self.root = self.node[current_id] current_id = None else: # does parent exist by any chance if self.node.has_key(parent_id): parent_node = self.node[parent_id] parent_node.children.append(current_node) # we are done here current_id = None else: # make new node parent_name = taxid2name(cursor, parent_id) parent_node = Node(parent_name) parent_node.tax_id = parent_id # grampa: parent_node.parent_id = taxid2parentid (cursor, parent_id) parent_node.children.append(current_node) self.node[parent_id] = parent_node # attach the current node to the parent current_id = parent_id # shortcircuit nodes with a single child new_root = self.root.__cleanup__() if (new_root): new_root.is_root = True self.root = new_root del_ids = [] for node_id, node in self.node.iteritems(): if node.is_leaf: continue if (not node.children): del_ids.append(node_id) for node_id in del_ids: del self.node[node_id] self.__set_parent_ids__ (self.root)
def add(self, cursor, name): leaf = Node(name) # get tax ids from the gene_db table in compara database switch_to_db(cursor, get_compara_name(cursor)) leaf.tax_id = species2taxid (cursor, leaf.name) leaf.is_leaf = True self.leafs.append(leaf) self.node[leaf.tax_id] = leaf
def add(self, cursor, name): leaf = Node(name) # get tax ids from the gene_db table in compara database switch_to_db(cursor, get_compara_name(cursor)) leaf.tax_id = species2taxid(cursor, leaf.name) leaf.is_leaf = True self.leafs.append(leaf) self.node[leaf.tax_id] = leaf
def taxid2name(cursor, tax_id): switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'scientific name'" rows = search_db(cursor, qry) if (not rows): rows = search_db(cursor, qry, verbose=True) return "" return rows[0][0]
def trivial2taxid(cursor, trivial_name): switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select tax_id from names where name_txt= '%s' " % trivial_name qry += " and name_class = 'trivial'" rows = search_db(cursor, qry) if (not rows or 'ERROR' in rows[0]): rows = search_db(cursor, qry, verbose=True) return "" return int(rows[0][0])
def taxid2trivial (cursor, tax_id): switch_to_db (cursor, get_ncbi_tax_name (cursor)) qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'trivial'"; rows = search_db (cursor, qry) if (not rows or 'ERROR' in rows[0]): rows = search_db (cursor, qry, verbose = True) return "" return rows[0][0]
def trivial2taxid (cursor, trivial_name): switch_to_db (cursor, get_ncbi_tax_name (cursor)) qry = "select tax_id from names where name_txt= '%s' " % trivial_name qry += " and name_class = 'trivial'"; rows = search_db (cursor, qry) if (not rows or 'ERROR' in rows[0]): rows = search_db (cursor, qry, verbose = True) return "" return int(rows[0][0])
def taxid2name (cursor, tax_id): switch_to_db (cursor, get_ncbi_tax_name (cursor)) qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'scientific name'"; rows = search_db (cursor, qry) if (not rows): rows = search_db (cursor, qry, verbose = True) return "" return rows[0][0]
def taxid2trivial(cursor, tax_id): switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'trivial'" rows = search_db(cursor, qry) if (not rows or 'ERROR' in rows[0]): rows = search_db(cursor, qry, verbose=True) return "" return rows[0][0]
def species2genome_db_id(cursor, species): switch_to_db(cursor, get_compara_name(cursor)) qry = "select genome_db_id from genome_db where name = '%s'" % species rows = search_db(cursor, qry) if not rows: search_db(cursor, qry, verbose=True) return 0 return int(rows[0][0])
def genome_db_id2species(cursor, genome_db_id): switch_to_db(cursor, get_compara_name(cursor)) qry = "select name from genome_db where genome_db_id = %d" % int(genome_db_id) rows = search_db(cursor, qry) if not rows: search_db(cursor, qry, verbose=True) return "" return rows[0][0]
def taxid2parentid (cursor, tax_id): switch_to_db (cursor, get_ncbi_tax_name (cursor)) qry = "select parent_tax_id from nodes where tax_id= %d " % int(tax_id) rows = search_db (cursor, qry) if (not rows): rows = search_db (cursor, qry, verbose = True) return "" try: retval = int(rows[0][0]) except: retval = "" return retval
def taxid2parentid(cursor, tax_id): switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select parent_tax_id from nodes where tax_id= %d " % int(tax_id) rows = search_db(cursor, qry) if (not rows): rows = search_db(cursor, qry, verbose=True) return "" try: retval = int(rows[0][0]) except: retval = "" return retval
def genome_db_id2species (cursor, genome_db_id): switch_to_db (cursor, get_compara_name (cursor)) qry = "select name from genome_db where genome_db_id = %d" % int(genome_db_id) rows = search_db (cursor, qry) if (not rows): search_db (cursor, qry, verbose = True) return "" return rows[0][0]
def species2genome_db_id (cursor, species): switch_to_db (cursor, get_compara_name (cursor)) qry = "select genome_db_id from genome_db where name = '%s'" % species rows = search_db (cursor, qry) if (not rows): search_db (cursor, qry, verbose = True) return 0 return int(rows[0][0])
def species2taxid(cursor, species): switch_to_db(cursor, get_compara_name(cursor)) qry = "select taxon_id from genome_db where name = '%s'" % species rows = search_db(cursor, qry) if not rows: search_db(cursor, qry, verbose=True) return "" try: retval = int(rows[0][0]) except: retval = "" return retval
def species2taxid (cursor, species): switch_to_db (cursor, get_compara_name (cursor)) qry = "select taxon_id from genome_db where name = '%s'" % species rows = search_db (cursor, qry) if (not rows): search_db (cursor, qry, verbose = True) return "" try: retval = int(rows[0][0]) except: retval = "" return retval
def get_canonical_coding_exons (cursor, gene_id, db_name=None): if db_name and not switch_to_db(cursor, db_name): return [] all_exons = gene2exon_list (cursor, gene_id) if not all_exons: return [] exons = filter (lambda x: x.is_coding and x.is_canonical, all_exons) if not exons: return [] # now, the problem is that an exon can be coding, # but not in the canonical version of the transcript exons.sort(key=lambda exon: exon.start_in_gene) if not exons: return [] # is there info about the beginning and the end of canonical translation? canonical_transcript_id = get_canonical_transcript_id (cursor, gene_id, db_name=None) if not canonical_transcript_id: return [] ret = get_canonical_coordinates (cursor, canonical_transcript_id) if not ret or not len(ret) == 4: return [] [canonical_start_in_exon, canonical_start_exon_id, canonical_end_in_exon, canonical_end_exon_id] = ret if canonical_start_exon_id is None or canonical_end_exon_id is None: return [] # filter the exons that are within the start and end bracket canonical_exons = [] reading = 0 for exon in exons: if exon.exon_id == canonical_start_exon_id or exon.exon_id == canonical_end_exon_id: reading = 1 - reading canonical_exons.append(exon) elif reading: canonical_exons.append(exon) return canonical_exons
def get_common_name(cursor, species): switch_to_db(cursor, get_compara_name(cursor)) tax_id = species2taxid(cursor, species) switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select name_txt from names where " qry += "tax_id = %d and " % tax_id qry += "name_class = 'genbank common name'" rows = search_db(cursor, qry) if rows: if ('ERROR' in rows[0]): search_db(cursor, qry, verbose=True) return "" else: return rows[0][0] return ""
def get_common_name (cursor, species): switch_to_db(cursor, get_compara_name(cursor)) tax_id = species2taxid (cursor, species) switch_to_db(cursor,get_ncbi_tax_name (cursor)) qry = "select name_txt from names where " qry += "tax_id = %d and " % tax_id qry += "name_class = 'genbank common name'" rows = search_db (cursor, qry) if rows: if ('ERROR' in rows[0]): search_db (cursor, qry, verbose = True) return "" else: return rows[0][0] return ""
def get_exon_pepseq(cursor, exon, db_name=None, verbose=False): if db_name: if not switch_to_db(cursor, db_name): return False if exon.exon_seq_id: exon_seq_id = exon.exon_seq_id qry = "select protein_seq " qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id else: exon_id = exon.exon_id is_known = exon.is_known qry = "select protein_seq " qry += " from exon_seq where exon_id = %d and is_known = %d" % (exon_id, is_known) rows = search_db(cursor, qry) if not rows: if verbose: rows = search_db(cursor, qry, verbose=True) print rows return "" protein_seq = rows[0][0] if protein_seq is None: protein_seq = "" return protein_seq
def gene2exon_list(cursor, gene_id, db_name=None, verbose=False): exons = [] if db_name: if not switch_to_db(cursor, db_name): return False qry = "select * from gene2exon where gene_id = %d " % gene_id rows = search_db(cursor, qry) if not rows: rows = search_db(cursor, "select database()") if verbose: print "database ", rows[0][0] rows = search_db(cursor, qry, verbose=True) print rows return [] for row in rows: exon = Exon() if not exon.load_from_gene2exon(row): continue exons.append(exon) return exons
def get_exon_pepseq (cursor, exon, db_name=None, verbose=False): if (db_name): if not switch_to_db(cursor, db_name): return False if exon.exon_seq_id: exon_seq_id = exon.exon_seq_id qry = "select protein_seq " qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id else: exon_id = exon.exon_id is_known = exon.is_known qry = "select protein_seq " qry += " from exon_seq where exon_id = %d and is_known = %d" % (exon_id, is_known) rows = search_db(cursor, qry) if (not rows): if verbose: rows = search_db(cursor, qry, verbose = True) print rows return "" protein_seq = rows[0][0] if (protein_seq is None): protein_seq = "" return protein_seq
def get_exon (cursor, exon_id, is_known=None, db_name=None): exon = Exon () if (db_name): if not switch_to_db(cursor, db_name): return exon if is_known==2: # sw# exon qry = "select * from sw_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if (not rows): return exon exon.load_from_novel_exon (rows[0], "sw_exon") elif is_known==3: # sw# exon qry = "select * from usearch_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if (not rows): return exon exon.load_from_novel_exon (rows[0], "usearch_exon") else: qry = "select * from gene2exon where exon_id = %d" % exon_id if is_known: qry += " and is_known = %s " % is_known rows = search_db(cursor, qry, verbose=False) if (not rows): return exon exon.load_from_gene2exon (rows[0]) return exon
def gene2exon_list (cursor, gene_id, db_name=None, verbose=False): exons = [] if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select * from gene2exon where gene_id = %d " % gene_id rows = search_db(cursor, qry) if (not rows): rows = search_db(cursor, 'select database()') if verbose: print "database ", rows[0][0] rows = search_db(cursor, qry, verbose = True) print rows return [] for row in rows: exon = Exon() if (not exon.load_from_gene2exon(row)): continue exons.append(exon) return exons
def get_exon_seq_by_db_id (cursor, exon_seq_id, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, " qry += " left_flank, right_flank, dna_seq " qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id rows = search_db(cursor, qry) if (not rows): #rows = search_db(cursor, qry, verbose = True) return [] [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq] = rows[0] if (protein_seq is None): protein_seq = "" if (left_flank is None): left_flank = "" if (right_flank is None): right_flank = "" if (dna_seq is None): dna_seq = "" return [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq]
def get_exon_seq_by_db_id(cursor, exon_seq_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return False qry = "select exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, " qry += " left_flank, right_flank, dna_seq " qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id rows = search_db(cursor, qry) if not rows: # rows = search_db(cursor, qry, verbose = True) return [] [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq] = rows[0] if protein_seq is None: protein_seq = "" if left_flank is None: left_flank = "" if right_flank is None: right_flank = "" if dna_seq is None: dna_seq = "" return [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq]
def get_exon(cursor, exon_id, is_known=None, db_name=None): exon = Exon() if db_name: if not switch_to_db(cursor, db_name): return exon if is_known == 2: # sw# exon qry = "select * from sw_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if not rows: return exon exon.load_from_novel_exon(rows[0], "sw_exon") elif is_known == 3: # sw# exon qry = "select * from usearch_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if not rows: return exon exon.load_from_novel_exon(rows[0], "usearch_exon") else: qry = "select * from gene2exon where exon_id = %d" % exon_id if is_known: qry += " and is_known = %s " % is_known rows = search_db(cursor, qry, verbose=False) if not rows: return exon exon.load_from_gene2exon(rows[0]) return exon
def get_maps(cursor, ensembl_db_name, exon_id, is_known): maps = [] switch_to_db (cursor, ensembl_db_name['homo_sapiens']) qry = "select * from exon_map where exon_id = %d " % exon_id qry += " and exon_known = %d " % is_known rows = search_db (cursor, qry) if not rows or "ERROR" in rows[0]: return [] for row in rows: map = Map() map.load_from_db(row, cursor) maps.append(map) return maps
def get_pepseq_transl_range (cursor, exon_id, exon_known, db_name=None): if db_name and not switch_to_db(cursor, db_name): return [] qry = "select pepseq_transl_start, pepseq_transl_end from exon_seq " qry += "where exon_id = %d " % int(exon_id) qry += "and is_known = %d " % int(exon_known) if not rows: return [] return rows[0]
def get_pepseq_transl_range(cursor, exon_id, exon_known, db_name=None): if db_name and not switch_to_db(cursor, db_name): return [] qry = "select pepseq_transl_start, pepseq_transl_end from exon_seq " qry += "where exon_id = %d " % int(exon_id) qry += "and is_known = %d " % int(exon_known) if not rows: return [] return rows[0]
def exon_id2gene_id (cursor, ensembl_db_name, exon_id, is_known): switch_to_db(cursor, ensembl_db_name) if is_known==3: # sw_sharp exon qry = "select gene_id from usearch_exon where " qry += "exon_id = %d " % exon_id elif is_known==2: # sw_sharp exon qry = "select gene_id from sw_exon where " qry += "exon_id = %d " % exon_id else: qry = "select gene_id from gene2exon where " qry += "exon_id = %s and is_known = %s " % (exon_id, is_known) rows = search_db (cursor, qry) if (not rows or 'ERROR' in rows[0]): rows = search_db (cursor, qry, verbose = True) return "" return rows[0][0]
def exon_id2gene_id(cursor, ensembl_db_name, exon_id, is_known): switch_to_db(cursor, ensembl_db_name) if is_known == 3: # sw_sharp exon qry = "select gene_id from usearch_exon where " qry += "exon_id = %d " % exon_id elif is_known == 2: # sw_sharp exon qry = "select gene_id from sw_exon where " qry += "exon_id = %d " % exon_id else: qry = "select gene_id from gene2exon where " qry += "exon_id = %s and is_known = %s " % (exon_id, is_known) rows = search_db(cursor, qry) if not rows or "ERROR" in rows[0]: rows = search_db(cursor, qry, verbose=True) return "" return rows[0][0]
def is_coding_exon (cursor, exon_id, is_known, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select is_coding from gene2exon where exon_id = %d and is_known = %d" % (exon_id, is_known) rows = search_db (cursor, qry) if ( not rows): return False return rows[0][0]>0
def exon_seq_id2exon_id(cursor, exon_seq_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return "" qry = "select exon_id, is_known from exon_seq where exon_seq_id = %d " % int(exon_seq_id) rows = search_db(cursor, qry) if not rows: return "" return rows[0]
def is_coding_exon(cursor, exon_id, is_known, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return False qry = "select is_coding from gene2exon where exon_id = %d and is_known = %d" % (exon_id, is_known) rows = search_db(cursor, qry) if not rows: return False return rows[0][0] > 0
def get_description(cursor, gene_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return False qry = "select description from gene where gene_id = %d " % int(gene_id) rows = search_db(cursor, qry) if rows: return rows[0][0] return ""
def get_description (cursor, gene_id, db_name = None): if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select description from gene where gene_id = %d " % int(gene_id) rows = search_db(cursor, qry) if rows: return rows[0][0] return ""
def exon_seq_id2exon_id (cursor, exon_seq_id, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return "" qry = "select exon_id, is_known from exon_seq where exon_seq_id = %d " % int(exon_seq_id) rows = search_db(cursor, qry) if (not rows): return "" return rows[0]
def get_sw_seq_id (cursor, exon_id, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return -1 qry = "select exon_seq_id " qry += " from sw_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry) if not rows or not rows[0][0]: return -1 return int(rows[0][0])
def get_sw_seq_id(cursor, exon_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return -1 qry = "select exon_seq_id " qry += " from sw_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry) if not rows or not rows[0][0]: return -1 return int(rows[0][0])
def get_status (cursor, exon_id, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select status from gene where gene_id = %d " % int(exon_id) rows = search_db (cursor, qry) if ( not rows): return False return rows[0][0]
def get_status(cursor, exon_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return False qry = "select status from gene where gene_id = %d " % int(exon_id) rows = search_db(cursor, qry) if not rows: return False return rows[0][0]
def get_gene_biotype(cursor, gene_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return False qry = "select biotype from gene where gene_id = %d " % int(gene_id) rows = search_db(cursor, qry) if not rows: return "" return rows[0][0]
def get_gene_biotype (cursor, gene_id, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select biotype from gene where gene_id = %d " % int(gene_id) rows = search_db (cursor, qry) if ( not rows): return "" return rows[0][0]
def is_coding (cursor, gene_id, db_name=None): if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select is_coding from gene where gene_id = %d " % int(gene_id) rows = search_db (cursor, qry) if ( not rows): return False return rows[0][0]>0
def trivial2scientific(cursor, trivial): switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select tax_id from names where " qry += "name_txt = '%s' and " % trivial qry += "name_class = 'trivial'" rows = search_db(cursor, qry) if rows: if ('ERROR' in rows[0]): search_db(cursor, qry, verbose=True) return "" else: try: tax_id = int(rows[0][0]) except: return "" sciname = taxid2name(cursor, tax_id).lower().replace(" ", "_") #canis_lupus_familiaris - don't know what to do with it sciname = sciname.replace("_familiaris", "") return sciname return ""
def trivial2scientific (cursor, trivial): switch_to_db(cursor,get_ncbi_tax_name (cursor)) qry = "select tax_id from names where " qry += "name_txt = '%s' and " % trivial qry += "name_class = 'trivial'" rows = search_db (cursor, qry) if rows: if ('ERROR' in rows[0]): search_db (cursor, qry, verbose = True) return "" else: try: tax_id = int(rows[0][0]) except: return "" sciname = taxid2name(cursor, tax_id).lower().replace(" ", "_") #canis_lupus_familiaris - don't know what to do with it sciname = sciname.replace("_familiaris", "") return sciname return ""
def is_coding(cursor, gene_id, db_name=None): if db_name: if not switch_to_db(cursor, db_name): return False qry = "select is_coding from gene where gene_id = %d " % int(gene_id) rows = search_db(cursor, qry) if not rows: return False return rows[0][0] > 0
def get_orthologues_from_species(cursor, ensembl_db_name, ortho_type, gene_member_id, species): # the ortho_type is one of the following: 'ortholog_one2one', # 'ortholog_one2many', 'ortholog_many2many', 'possible_ortholog', 'apparent_ortholog_one2one' orthos = [] # find genome db_id genome_db_id = species2genome_db_id (cursor, species) # make the cursor point to compara database - should be the responsibility of each function switch_to_db (cursor, get_compara_name (cursor)) qry = "select homology.homology_id from homology_member, homology " qry += " where homology_member.gene_member_id =%d " % gene_member_id qry += " and homology.homology_id = homology_member.homology_id " qry += " and homology.description = '%s' " % ortho_type rows = search_db (cursor, qry) if (not rows): return [] # no orthologs here # for each homology id find the other member id #print qry #print member_id, ortho_type, species, genome_db_id #print rows for row in rows: homology_id = row[0] #print "\t homology id:", homology_id switch_to_db (cursor, get_compara_name (cursor)) qry = "select gene_member_id from homology_member " qry += " where homology_id = %d" % int(homology_id) qry += " and not gene_member_id = %d" % gene_member_id rows2 = search_db (cursor, qry, verbose = False) if (not rows2): #print "\t ", #rows2 = search_db (cursor, qry, verbose = True) continue for row2 in rows2: ortho_id = row2[0] #print "\t\t ortho id:", ortho_id qry = "select stable_id from gene_member " qry += " where gene_member_id = %d " % ortho_id qry += " and genome_db_id = %d " % genome_db_id rows3 = search_db (cursor, qry, verbose = False) if (not rows3): #print "\t\t ", #rows3 = search_db (cursor, qry, verbose = True) continue ortho_stable = rows3[0][0] #print "\t\t ortho stable:", ortho_stable orthos.append(ortho_stable) if orthos: switch_to_db (cursor, ensembl_db_name [species]) orthos = map (lambda gene_id: stable2gene(cursor, gene_id), orthos) #print 'orthos:', orthos return orthos
def get_orthologues_from_species(cursor, ensembl_db_name, ortho_type, gene_member_id, species): # the ortho_type is one of the following: 'ortholog_one2one', # 'ortholog_one2many', 'ortholog_many2many', 'possible_ortholog', 'apparent_ortholog_one2one' orthos = [] # find genome db_id genome_db_id = species2genome_db_id(cursor, species) # make the cursor point to compara database - should be the responsibility of each function switch_to_db(cursor, get_compara_name(cursor)) qry = "select homology.homology_id from homology_member, homology " qry += " where homology_member.gene_member_id =%d " % gene_member_id qry += " and homology.homology_id = homology_member.homology_id " qry += " and homology.description = '%s' " % ortho_type rows = search_db(cursor, qry) if not rows: return [] # no orthologs here # for each homology id find the other member id # print qry # print member_id, ortho_type, species, genome_db_id # print rows for row in rows: homology_id = row[0] # print "\t homology id:", homology_id switch_to_db(cursor, get_compara_name(cursor)) qry = "select gene_member_id from homology_member " qry += " where homology_id = %d" % int(homology_id) qry += " and not gene_member_id = %d" % gene_member_id rows2 = search_db(cursor, qry, verbose=False) if not rows2: # print "\t ", # rows2 = search_db (cursor, qry, verbose = True) continue for row2 in rows2: ortho_id = row2[0] # print "\t\t ortho id:", ortho_id qry = "select stable_id from gene_member " qry += " where gene_member_id = %d " % ortho_id qry += " and genome_db_id = %d " % genome_db_id rows3 = search_db(cursor, qry, verbose=False) if not rows3: # print "\t\t ", # rows3 = search_db (cursor, qry, verbose = True) continue ortho_stable = rows3[0][0] # print "\t\t ortho stable:", ortho_stable orthos.append(ortho_stable) if orthos: switch_to_db(cursor, ensembl_db_name[species]) orthos = map(lambda gene_id: stable2gene(cursor, gene_id), orthos) # print 'orthos:', orthos return orthos
def get_logic_name(cursor, analysis_id, db_name = None): if analysis_id < 0: return '' if (db_name): if not switch_to_db(cursor, db_name): return False qry = "SELECT logic_name FROM analysis WHERE analysis_id = %d" % analysis_id rows = search_db (cursor, qry) if (not rows): logic_name = '' else: logic_name = rows[0][0] return logic_name
def gene2canon_transl(cursor, gene_id, db_name=None,): if (db_name and not switch_to_db(cursor, db_name)): return False qry = "select translation.translation_id from translation, gene " qry += " where gene.canonical_transcript_id = translation.transcript_id " qry += " and gene.gene_id = %d " % gene_id rows = search_db (cursor, qry, verbose = False) if (not rows): rows = search_db (cursor, qry, verbose = True) return "" return rows[0][0]
def get_logic_name(cursor, analysis_id, db_name=None): if analysis_id < 0: return "" if db_name: if not switch_to_db(cursor, db_name): return False qry = "SELECT logic_name FROM analysis WHERE analysis_id = %d" % analysis_id rows = search_db(cursor, qry) if not rows: logic_name = "" else: logic_name = rows[0][0] return logic_name
def gene2stable_canon_transl(cursor, gene_id, db_name=None): if db_name and not switch_to_db(cursor, db_name): return False qry = "select translation.stable_id from translation, gene " qry += " where gene.canonical_transcript_id = translation.transcript_id " qry += " and gene.gene_id = %d " % gene_id rows = search_db(cursor, qry, verbose=False) if not rows: rows = search_db(cursor, qry, verbose=True) return "" return rows[0][0]
def stable2gene (cursor, stable_id=None, db_name=None): if (not stable_id): return 0 if (db_name and not switch_to_db(cursor, db_name)): return False qry = "select gene_id from gene where stable_id='%s'" % stable_id rows = search_db (cursor, qry, verbose = False) if (not rows): rows = search_db (cursor, qry, verbose = True) return 0 return int(rows[0][0])