예제 #1
0
    def build(self, cursor):
        switch_to_db(cursor, get_compara_name(cursor))
        for leaf in self.leafs:
            leaf.tax_id = species2taxid(cursor, leaf.name)
            leaf.is_leaf = True
            self.node[leaf.tax_id] = leaf

        # build the tree using ncbi_tax.nodes
        # fill in the names using ncbi_tax.names
        switch_to_db(cursor, get_ncbi_tax_name(cursor))
        for leaf in self.leafs:
            parent_id = taxid2parentid(cursor, leaf.tax_id)
            leaf.parent_id = parent_id
            current_id = leaf.tax_id
            # move to the root
            while current_id:
                current_node = self.node[current_id]
                parent_id = taxid2parentid(cursor, parent_id)
                if (not parent_id or current_id == parent_id):
                    current_node.is_root = True
                    self.root = self.node[current_id]
                    current_id = None

                else:

                    # does parent exist by any chance
                    if self.node.has_key(parent_id):
                        parent_node = self.node[parent_id]
                        parent_node.children.append(current_node)
                        # we are done here
                        current_id = None
                    else:  # make new node
                        parent_name = taxid2name(cursor, parent_id)
                        parent_node = Node(parent_name)
                        parent_node.tax_id = parent_id
                        # grampa:
                        parent_node.parent_id = taxid2parentid(
                            cursor, parent_id)
                        parent_node.children.append(current_node)
                        self.node[parent_id] = parent_node
                        # attach the current node to the parent
                        current_id = parent_id

        # shortcircuit nodes with a single child
        new_root = self.root.__cleanup__()
        if (new_root):
            new_root.is_root = True
            self.root = new_root

        del_ids = []
        for node_id, node in self.node.iteritems():
            if node.is_leaf:
                continue
            if (not node.children):
                del_ids.append(node_id)

        for node_id in del_ids:
            del self.node[node_id]

        self.__set_parent_ids__(self.root)
예제 #2
0
def find_mammals(cursor, trivial_name_list):
    
    mammals = []
    for trivial_name in trivial_name_list:
        switch_to_db(cursor, get_compara_name(cursor))
        tax_id = trivial2taxid (cursor, trivial_name)
        parent_id = taxid2parentid (cursor, tax_id)

        tax_id = parent_id
        is_mammal = False
        while tax_id:
            qry  = "select name_txt from names where tax_id= %d " % int(tax_id)
            qry += " and name_class = 'scientific name'";
            rows = search_db (cursor, qry)
            if rows and rows[0][0]:
                if 'mammal' in rows[0][0].lower():
                    is_mammal = True
                    break
                elif 'vertebrat' in  rows[0][0].lower():
                    # if the thing wasa mammal, we would have found it by now
                    is_mammal = False
                    break
               
            parent_id = taxid2parentid (cursor, tax_id)
            if parent_id and parent_id>1:
                tax_id = parent_id
            else:
                tax_id = None

        if is_mammal: 
            mammals.append(trivial_name)

            
    return mammals
예제 #3
0
def find_mammals(cursor, trivial_name_list):

    mammals = []
    for trivial_name in trivial_name_list:
        switch_to_db(cursor, get_compara_name(cursor))
        tax_id = trivial2taxid(cursor, trivial_name)
        parent_id = taxid2parentid(cursor, tax_id)

        tax_id = parent_id
        is_mammal = False
        while tax_id:
            qry = "select name_txt from names where tax_id= %d " % int(tax_id)
            qry += " and name_class = 'scientific name'"
            rows = search_db(cursor, qry)
            if rows and rows[0][0]:
                if 'mammal' in rows[0][0].lower():
                    is_mammal = True
                    break
                elif 'vertebrat' in rows[0][0].lower():
                    # if the thing wasa mammal, we would have found it by now
                    is_mammal = False
                    break

            parent_id = taxid2parentid(cursor, tax_id)
            if parent_id and parent_id > 1:
                tax_id = parent_id
            else:
                tax_id = None

        if is_mammal:
            mammals.append(trivial_name)

    return mammals
예제 #4
0
    def build (self, cursor):
        switch_to_db(cursor, get_compara_name(cursor))
        for leaf in self.leafs:
            leaf.tax_id = species2taxid (cursor, leaf.name)
            leaf.is_leaf = True
            self.node[leaf.tax_id] = leaf

        # build the tree using ncbi_tax.nodes
        # fill in the names using ncbi_tax.names
        switch_to_db(cursor, get_ncbi_tax_name(cursor))
        for leaf in self.leafs:
            parent_id = taxid2parentid (cursor, leaf.tax_id)
            leaf.parent_id = parent_id
            current_id     = leaf.tax_id
            # move to the root
            while current_id:
                current_node = self.node[current_id]
                parent_id    = taxid2parentid (cursor, parent_id)
                if (not parent_id or  current_id == parent_id):
                    current_node.is_root = True
                    self.root = self.node[current_id]
                    current_id = None

                else:

                    # does parent exist by any chance
                    if self.node.has_key(parent_id):
                        parent_node = self.node[parent_id]
                        parent_node.children.append(current_node)
                        # we are done here
                        current_id = None
                    else: # make new node
                        parent_name    = taxid2name(cursor, parent_id)
                        parent_node    = Node(parent_name)
                        parent_node.tax_id = parent_id
                        # grampa:
                        parent_node.parent_id = taxid2parentid (cursor, parent_id)
                        parent_node.children.append(current_node)
                        self.node[parent_id]  = parent_node
                        # attach the current node to the parent
                        current_id = parent_id
        
        # shortcircuit nodes with a single child
        new_root = self.root.__cleanup__()
        if (new_root):
            new_root.is_root = True
            self.root = new_root

        del_ids  = []
        for node_id, node in self.node.iteritems():
            if node.is_leaf:
                continue
            if (not node.children):
                del_ids.append(node_id)

        for node_id in del_ids:
            del self.node[node_id]
                              
        self.__set_parent_ids__ (self.root)
예제 #5
0
 def add(self, cursor, name):
     leaf = Node(name)
     # get tax ids from the gene_db table in compara database
     switch_to_db(cursor, get_compara_name(cursor))
     leaf.tax_id = species2taxid (cursor, leaf.name)
     leaf.is_leaf = True
     self.leafs.append(leaf)
     self.node[leaf.tax_id] = leaf
예제 #6
0
 def add(self, cursor, name):
     leaf = Node(name)
     # get tax ids from the gene_db table in compara database
     switch_to_db(cursor, get_compara_name(cursor))
     leaf.tax_id = species2taxid(cursor, leaf.name)
     leaf.is_leaf = True
     self.leafs.append(leaf)
     self.node[leaf.tax_id] = leaf
예제 #7
0
def taxid2name(cursor, tax_id):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'scientific name'"
    rows = search_db(cursor, qry)
    if (not rows):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return rows[0][0]
예제 #8
0
def trivial2taxid(cursor, trivial_name):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select tax_id from names where name_txt= '%s' " % trivial_name
    qry += " and name_class = 'trivial'"
    rows = search_db(cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return int(rows[0][0])
예제 #9
0
def taxid2trivial (cursor, tax_id):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry  = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'trivial'";
    rows = search_db (cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return rows[0][0]
예제 #10
0
def trivial2taxid (cursor, trivial_name):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry  = "select tax_id from names where name_txt= '%s' " % trivial_name
    qry += " and name_class = 'trivial'";
    rows = search_db (cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return int(rows[0][0])
예제 #11
0
def taxid2name (cursor, tax_id):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry  = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'scientific name'";
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    return rows[0][0]
예제 #12
0
def taxid2trivial(cursor, tax_id):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where tax_id= %d " % int(tax_id)
    qry += " and name_class = 'trivial'"
    rows = search_db(cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    return rows[0][0]
예제 #13
0
def species2genome_db_id(cursor, species):

    switch_to_db(cursor, get_compara_name(cursor))

    qry = "select genome_db_id from genome_db where name = '%s'" % species

    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return 0

    return int(rows[0][0])
예제 #14
0
def genome_db_id2species(cursor, genome_db_id):

    switch_to_db(cursor, get_compara_name(cursor))

    qry = "select name from genome_db where genome_db_id = %d" % int(genome_db_id)

    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return ""

    return rows[0][0]
예제 #15
0
def taxid2parentid (cursor, tax_id):
    switch_to_db (cursor, get_ncbi_tax_name (cursor))
    qry = "select parent_tax_id from nodes where tax_id= %d " % int(tax_id)
    rows = search_db (cursor, qry)
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""
    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
예제 #16
0
def taxid2parentid(cursor, tax_id):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select parent_tax_id from nodes where tax_id= %d " % int(tax_id)
    rows = search_db(cursor, qry)
    if (not rows):
        rows = search_db(cursor, qry, verbose=True)
        return ""
    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
예제 #17
0
def genome_db_id2species (cursor, genome_db_id):


    switch_to_db (cursor, get_compara_name (cursor))

    qry  = "select name from genome_db where genome_db_id = %d" % int(genome_db_id)

    rows = search_db (cursor, qry)
    if (not rows):
        search_db (cursor, qry, verbose = True)
        return ""
    
    return rows[0][0]
예제 #18
0
def species2genome_db_id (cursor, species):


    switch_to_db (cursor, get_compara_name (cursor))

    qry  = "select genome_db_id from genome_db where name = '%s'" % species

    rows = search_db (cursor, qry)
    if (not rows):
        search_db (cursor, qry, verbose = True)
        return 0
    
    return int(rows[0][0])
예제 #19
0
def species2taxid(cursor, species):

    switch_to_db(cursor, get_compara_name(cursor))
    qry = "select taxon_id from genome_db where name = '%s'" % species
    rows = search_db(cursor, qry)
    if not rows:
        search_db(cursor, qry, verbose=True)
        return ""

    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
예제 #20
0
def species2taxid (cursor, species):

    switch_to_db (cursor, get_compara_name (cursor))
    qry  = "select taxon_id from genome_db where name = '%s'" % species
    rows = search_db (cursor, qry)
    if (not rows):
        search_db (cursor, qry, verbose = True)
        return ""
    
    try:
        retval = int(rows[0][0])
    except:
        retval = ""
    return retval
예제 #21
0
def get_canonical_coding_exons (cursor, gene_id, db_name=None):

    if db_name and not switch_to_db(cursor, db_name):
        return []

    all_exons =  gene2exon_list (cursor, gene_id)
    if not all_exons:  return []

    exons = filter (lambda x: x.is_coding and x.is_canonical, all_exons)
    if not exons:   return []
    # now, the problem is that an exon can be coding, 
    # but not in the canonical version of the transcript
    exons.sort(key=lambda exon: exon.start_in_gene)
    if not exons:   return []
    # is there info about the beginning and the end of canonical translation?
    canonical_transcript_id  = get_canonical_transcript_id (cursor, gene_id, db_name=None)
    if not canonical_transcript_id:   return []
    ret = get_canonical_coordinates (cursor, canonical_transcript_id)
    if not ret or not len(ret) == 4:  return []
    [canonical_start_in_exon, canonical_start_exon_id,
     canonical_end_in_exon, canonical_end_exon_id] = ret
    if canonical_start_exon_id is None or  canonical_end_exon_id is None:  return []
    
    # filter the exons that are within the start and end bracket
    canonical_exons = []
    reading = 0
    for exon in exons:
        if exon.exon_id == canonical_start_exon_id or  exon.exon_id == canonical_end_exon_id:
            reading = 1 - reading
            canonical_exons.append(exon)
        elif reading:
            canonical_exons.append(exon)
        
    return canonical_exons
예제 #22
0
def get_common_name(cursor, species):
    switch_to_db(cursor, get_compara_name(cursor))
    tax_id = species2taxid(cursor, species)
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where "
    qry += "tax_id = %d and " % tax_id
    qry += "name_class = 'genbank common name'"
    rows = search_db(cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db(cursor, qry, verbose=True)
            return ""
        else:
            return rows[0][0]

    return ""
예제 #23
0
def get_common_name (cursor, species):
    switch_to_db(cursor, get_compara_name(cursor))
    tax_id = species2taxid (cursor, species)
    switch_to_db(cursor,get_ncbi_tax_name (cursor))
    qry   = "select name_txt from names where "
    qry  += "tax_id = %d and " % tax_id
    qry  += "name_class = 'genbank common name'"
    rows = search_db (cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db (cursor, qry, verbose = True)
            return ""
        else:
            return rows[0][0]

    return ""
예제 #24
0
def get_exon_pepseq(cursor, exon, db_name=None, verbose=False):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    if exon.exon_seq_id:
        exon_seq_id = exon.exon_seq_id
        qry = "select protein_seq "
        qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    else:
        exon_id = exon.exon_id
        is_known = exon.is_known
        qry = "select protein_seq  "
        qry += " from exon_seq where exon_id = %d and is_known = %d" % (exon_id, is_known)

    rows = search_db(cursor, qry)

    if not rows:
        if verbose:
            rows = search_db(cursor, qry, verbose=True)
            print rows
        return ""

    protein_seq = rows[0][0]
    if protein_seq is None:
        protein_seq = ""

    return protein_seq
예제 #25
0
def gene2exon_list(cursor, gene_id, db_name=None, verbose=False):

    exons = []

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select * from gene2exon where gene_id = %d " % gene_id
    rows = search_db(cursor, qry)

    if not rows:
        rows = search_db(cursor, "select database()")
        if verbose:
            print "database ", rows[0][0]
            rows = search_db(cursor, qry, verbose=True)
            print rows
        return []

    for row in rows:
        exon = Exon()
        if not exon.load_from_gene2exon(row):
            continue
        exons.append(exon)

    return exons
예제 #26
0
def get_exon_pepseq (cursor, exon, db_name=None, verbose=False):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    if exon.exon_seq_id:
        exon_seq_id = exon.exon_seq_id
        qry  = "select protein_seq "
        qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    else:
        exon_id  = exon.exon_id
        is_known = exon.is_known
        qry  = "select protein_seq  "
        qry += " from exon_seq where exon_id = %d and is_known = %d" % (exon_id, is_known)

        
    rows = search_db(cursor, qry)


    if (not rows):
        if verbose:
            rows = search_db(cursor, qry, verbose = True)
            print rows
        return ""


    protein_seq = rows[0][0]
    if (protein_seq is None):
        protein_seq = ""
  
    return protein_seq
예제 #27
0
def get_exon (cursor, exon_id, is_known=None, db_name=None):

    exon = Exon ()

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return exon

    if is_known==2:
        # sw# exon
        qry  = "select * from sw_exon where exon_id = %d"   % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_novel_exon (rows[0], "sw_exon")
    elif is_known==3:
        # sw# exon
        qry  = "select * from usearch_exon where exon_id = %d"   % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_novel_exon (rows[0], "usearch_exon")
    else:
        qry  = "select * from gene2exon where exon_id = %d" % exon_id
        if is_known: qry += " and is_known = %s " % is_known
        rows = search_db(cursor, qry, verbose=False)
        if (not rows):
            return exon
        exon.load_from_gene2exon (rows[0])

    return exon
예제 #28
0
def gene2exon_list (cursor, gene_id, db_name=None, verbose=False):

    exons = []

    if (db_name): 
        if not switch_to_db(cursor, db_name):
            return False

    qry  = "select * from gene2exon where gene_id = %d " % gene_id
    rows = search_db(cursor, qry)

    if (not rows):
        rows = search_db(cursor, 'select database()')
        if verbose:
            print "database ", rows[0][0]
            rows = search_db(cursor, qry, verbose = True)
            print rows
        return []

    for row in rows:
        exon = Exon()
        if (not exon.load_from_gene2exon(row)):
            continue
        exons.append(exon)

    return exons
예제 #29
0
def get_exon_seq_by_db_id (cursor, exon_seq_id, db_name=None):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    qry  = "select exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, "
    qry += " left_flank, right_flank, dna_seq  "
    qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    rows = search_db(cursor, qry)
    if (not rows):
        #rows = search_db(cursor, qry, verbose = True)
        return []

    [exon_seq_id, protein_seq, pepseq_transl_start, 
     pepseq_transl_end, left_flank, right_flank, dna_seq] = rows[0]
    if (protein_seq is None):
        protein_seq = ""
    if (left_flank  is None):
        left_flank = ""
    if (right_flank is None):
        right_flank = ""
    if (dna_seq is None):
        dna_seq = ""
    
    return [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq]
예제 #30
0
def get_exon_seq_by_db_id(cursor, exon_seq_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, "
    qry += " left_flank, right_flank, dna_seq  "
    qry += " from exon_seq where exon_seq_id = %d" % exon_seq_id
    rows = search_db(cursor, qry)
    if not rows:
        # rows = search_db(cursor, qry, verbose = True)
        return []

    [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq] = rows[0]
    if protein_seq is None:
        protein_seq = ""
    if left_flank is None:
        left_flank = ""
    if right_flank is None:
        right_flank = ""
    if dna_seq is None:
        dna_seq = ""

    return [exon_seq_id, protein_seq, pepseq_transl_start, pepseq_transl_end, left_flank, right_flank, dna_seq]
예제 #31
0
def get_exon(cursor, exon_id, is_known=None, db_name=None):

    exon = Exon()

    if db_name:
        if not switch_to_db(cursor, db_name):
            return exon

    if is_known == 2:
        # sw# exon
        qry = "select * from sw_exon where exon_id = %d" % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_novel_exon(rows[0], "sw_exon")
    elif is_known == 3:
        # sw# exon
        qry = "select * from usearch_exon where exon_id = %d" % exon_id
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_novel_exon(rows[0], "usearch_exon")
    else:
        qry = "select * from gene2exon where exon_id = %d" % exon_id
        if is_known:
            qry += " and is_known = %s " % is_known
        rows = search_db(cursor, qry, verbose=False)
        if not rows:
            return exon
        exon.load_from_gene2exon(rows[0])

    return exon
예제 #32
0
def get_maps(cursor, ensembl_db_name, exon_id, is_known):
    
    maps = []

    switch_to_db (cursor,  ensembl_db_name['homo_sapiens'])
    qry  = "select * from exon_map where exon_id = %d " % exon_id
    qry += " and exon_known = %d " % is_known

    rows = search_db (cursor, qry)
    if not rows or "ERROR" in rows[0]:
        return []

    for row in rows:
        map = Map()
        map.load_from_db(row, cursor)
        maps.append(map)
    return maps
예제 #33
0
def get_pepseq_transl_range (cursor, exon_id, exon_known, db_name=None):
    if db_name and not switch_to_db(cursor, db_name):
        return []
    qry  = "select pepseq_transl_start, pepseq_transl_end from exon_seq "
    qry += "where exon_id = %d " % int(exon_id)
    qry += "and is_known = %d " % int(exon_known)
    if not rows:
        return []
    return rows[0]
예제 #34
0
def get_pepseq_transl_range(cursor, exon_id, exon_known, db_name=None):
    if db_name and not switch_to_db(cursor, db_name):
        return []
    qry = "select pepseq_transl_start, pepseq_transl_end from exon_seq "
    qry += "where exon_id = %d " % int(exon_id)
    qry += "and is_known = %d " % int(exon_known)
    if not rows:
        return []
    return rows[0]
예제 #35
0
def  exon_id2gene_id (cursor, ensembl_db_name, exon_id, is_known):

    switch_to_db(cursor, ensembl_db_name)
    if is_known==3: # sw_sharp exon
        qry  = "select gene_id from usearch_exon where "
        qry += "exon_id = %d " % exon_id
    elif is_known==2: # sw_sharp exon
        qry  = "select gene_id from sw_exon where "
        qry += "exon_id = %d " % exon_id
    else:
        qry  = "select gene_id from gene2exon where "
        qry += "exon_id = %s and is_known = %s " % (exon_id, is_known)
    
    rows = search_db (cursor, qry)
    if (not rows or 'ERROR' in rows[0]):
        rows = search_db (cursor, qry, verbose = True)
        return ""

    return rows[0][0]
예제 #36
0
def exon_id2gene_id(cursor, ensembl_db_name, exon_id, is_known):

    switch_to_db(cursor, ensembl_db_name)
    if is_known == 3:  # sw_sharp exon
        qry = "select gene_id from usearch_exon where "
        qry += "exon_id = %d " % exon_id
    elif is_known == 2:  # sw_sharp exon
        qry = "select gene_id from sw_exon where "
        qry += "exon_id = %d " % exon_id
    else:
        qry = "select gene_id from gene2exon where "
        qry += "exon_id = %s and is_known = %s " % (exon_id, is_known)

    rows = search_db(cursor, qry)
    if not rows or "ERROR" in rows[0]:
        rows = search_db(cursor, qry, verbose=True)
        return ""

    return rows[0][0]
예제 #37
0
def is_coding_exon (cursor, exon_id, is_known, db_name=None):
    
    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False
    qry = "select is_coding from gene2exon where exon_id = %d and is_known = %d" % (exon_id, is_known)
    rows = search_db (cursor, qry)
    if ( not rows):
        return False

    return rows[0][0]>0
예제 #38
0
def exon_seq_id2exon_id(cursor, exon_seq_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return ""
    qry = "select exon_id, is_known from exon_seq where exon_seq_id = %d  " % int(exon_seq_id)
    rows = search_db(cursor, qry)
    if not rows:
        return ""

    return rows[0]
예제 #39
0
def is_coding_exon(cursor, exon_id, is_known, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False
    qry = "select is_coding from gene2exon where exon_id = %d and is_known = %d" % (exon_id, is_known)
    rows = search_db(cursor, qry)
    if not rows:
        return False

    return rows[0][0] > 0
예제 #40
0
def get_description(cursor, gene_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False
    qry = "select description from gene where gene_id = %d " % int(gene_id)
    rows = search_db(cursor, qry)
    if rows:
        return rows[0][0]

    return ""
예제 #41
0
def get_description (cursor, gene_id, db_name = None):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False
    qry  = "select description from gene where gene_id = %d " % int(gene_id)
    rows = search_db(cursor, qry)
    if rows:
        return rows[0][0]

    return ""
예제 #42
0
def exon_seq_id2exon_id (cursor, exon_seq_id, db_name=None):

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return ""
    qry  = "select exon_id, is_known from exon_seq where exon_seq_id = %d  " % int(exon_seq_id)
    rows = search_db(cursor, qry)
    if (not rows):
        return ""


    return rows[0]
예제 #43
0
def  get_sw_seq_id (cursor, exon_id, db_name=None):
    
    if (db_name):
        if not switch_to_db(cursor, db_name):
            return -1
    qry  = "select exon_seq_id "
    qry += " from sw_exon where exon_id = %d" % exon_id
    rows = search_db(cursor, qry)
    if not rows or not rows[0][0]:
        return -1
    
    return int(rows[0][0])
예제 #44
0
def get_sw_seq_id(cursor, exon_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return -1
    qry = "select exon_seq_id "
    qry += " from sw_exon where exon_id = %d" % exon_id
    rows = search_db(cursor, qry)
    if not rows or not rows[0][0]:
        return -1

    return int(rows[0][0])
예제 #45
0
def get_status (cursor, exon_id, db_name=None):
    
    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select status from gene where gene_id = %d " % int(exon_id)
    rows = search_db (cursor, qry)
    if ( not rows):
        return False

    return rows[0][0]
예제 #46
0
def get_status(cursor, exon_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select status from gene where gene_id = %d " % int(exon_id)
    rows = search_db(cursor, qry)
    if not rows:
        return False

    return rows[0][0]
예제 #47
0
def get_gene_biotype(cursor, gene_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select biotype from gene where gene_id = %d " % int(gene_id)
    rows = search_db(cursor, qry)
    if not rows:
        return ""

    return rows[0][0]
예제 #48
0
def get_gene_biotype (cursor, gene_id, db_name=None):
    
    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select biotype from gene where gene_id = %d " % int(gene_id)
    rows = search_db (cursor, qry)
    if ( not rows):
        return ""

    return rows[0][0]
예제 #49
0
def is_coding (cursor, gene_id, db_name=None):
    
    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select is_coding from gene where gene_id = %d " % int(gene_id)
    rows = search_db (cursor, qry)
    if ( not rows):
        return False

    return rows[0][0]>0
예제 #50
0
def trivial2scientific(cursor, trivial):
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select tax_id from names where "
    qry += "name_txt = '%s' and " % trivial
    qry += "name_class = 'trivial'"
    rows = search_db(cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db(cursor, qry, verbose=True)
            return ""
        else:
            try:
                tax_id = int(rows[0][0])
            except:
                return ""
            sciname = taxid2name(cursor, tax_id).lower().replace(" ", "_")
            #canis_lupus_familiaris - don't know what to do with it
            sciname = sciname.replace("_familiaris", "")
            return sciname

    return ""
예제 #51
0
def trivial2scientific (cursor, trivial):
    switch_to_db(cursor,get_ncbi_tax_name (cursor))
    qry   = "select tax_id from names where "
    qry  += "name_txt = '%s' and " % trivial
    qry  += "name_class = 'trivial'"
    rows = search_db (cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db (cursor, qry, verbose = True)
            return ""
        else:
            try:
                tax_id = int(rows[0][0])
            except:
                return ""
            sciname = taxid2name(cursor, tax_id).lower().replace(" ", "_")
            #canis_lupus_familiaris - don't know what to do with it
            sciname = sciname.replace("_familiaris", "")
            return sciname

    return ""
예제 #52
0
def is_coding(cursor, gene_id, db_name=None):

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False

    qry = "select is_coding from gene where gene_id = %d " % int(gene_id)
    rows = search_db(cursor, qry)
    if not rows:
        return False

    return rows[0][0] > 0
예제 #53
0
def get_orthologues_from_species(cursor, ensembl_db_name, ortho_type, gene_member_id, species):

    # the ortho_type is one of the following: 'ortholog_one2one', 
    # 'ortholog_one2many', 'ortholog_many2many', 'possible_ortholog', 'apparent_ortholog_one2one'
    orthos = []

    # find genome db_id
    genome_db_id = species2genome_db_id (cursor, species)

    # make the cursor point to compara database - should be the responsibility of each function
    switch_to_db (cursor, get_compara_name (cursor))

    qry  = "select homology.homology_id from homology_member, homology "
    qry += " where homology_member.gene_member_id =%d " % gene_member_id
    qry += " and homology.homology_id = homology_member.homology_id "
    qry += " and  homology.description = '%s' "    % ortho_type
    rows = search_db (cursor, qry)

    if (not rows):
        return [] # no orthologs here

    # for each homology id find the other member id
    #print qry
    #print member_id, ortho_type, species, genome_db_id
    #print rows
    for row in rows:
        homology_id = row[0]
        #print "\t homology id:", homology_id
        switch_to_db (cursor, get_compara_name (cursor))
        qry  = "select gene_member_id from homology_member "
        qry += " where homology_id = %d"  % int(homology_id)
        qry += " and not  gene_member_id = %d" % gene_member_id

        rows2  = search_db (cursor, qry, verbose = False)
        if (not rows2):
            #print "\t ",
            #rows2 = search_db (cursor, qry, verbose = True)
            continue
        for row2 in rows2:
            ortho_id     = row2[0]
            #print "\t\t ortho id:", ortho_id
            qry  = "select  stable_id  from gene_member  "
            qry += " where gene_member_id = %d "  % ortho_id
            qry += " and genome_db_id = %d " % genome_db_id
            rows3 = search_db (cursor, qry, verbose = False)
            if (not rows3):
                #print "\t\t ",
                #rows3 = search_db (cursor, qry, verbose = True)
                continue
            ortho_stable  = rows3[0][0]
            #print "\t\t ortho stable:", ortho_stable
            orthos.append(ortho_stable)
    if orthos:    
        switch_to_db (cursor, ensembl_db_name [species])
        orthos = map  (lambda gene_id:  stable2gene(cursor, gene_id), orthos)
    #print 'orthos:', orthos
    return orthos
예제 #54
0
def get_orthologues_from_species(cursor, ensembl_db_name, ortho_type, gene_member_id, species):

    # the ortho_type is one of the following: 'ortholog_one2one',
    # 'ortholog_one2many', 'ortholog_many2many', 'possible_ortholog', 'apparent_ortholog_one2one'
    orthos = []

    # find genome db_id
    genome_db_id = species2genome_db_id(cursor, species)

    # make the cursor point to compara database - should be the responsibility of each function
    switch_to_db(cursor, get_compara_name(cursor))

    qry = "select homology.homology_id from homology_member, homology "
    qry += " where homology_member.gene_member_id =%d " % gene_member_id
    qry += " and homology.homology_id = homology_member.homology_id "
    qry += " and  homology.description = '%s' " % ortho_type
    rows = search_db(cursor, qry)

    if not rows:
        return []  # no orthologs here

    # for each homology id find the other member id
    # print qry
    # print member_id, ortho_type, species, genome_db_id
    # print rows
    for row in rows:
        homology_id = row[0]
        # print "\t homology id:", homology_id
        switch_to_db(cursor, get_compara_name(cursor))
        qry = "select gene_member_id from homology_member "
        qry += " where homology_id = %d" % int(homology_id)
        qry += " and not  gene_member_id = %d" % gene_member_id

        rows2 = search_db(cursor, qry, verbose=False)
        if not rows2:
            # print "\t ",
            # rows2 = search_db (cursor, qry, verbose = True)
            continue
        for row2 in rows2:
            ortho_id = row2[0]
            # print "\t\t ortho id:", ortho_id
            qry = "select  stable_id  from gene_member  "
            qry += " where gene_member_id = %d " % ortho_id
            qry += " and genome_db_id = %d " % genome_db_id
            rows3 = search_db(cursor, qry, verbose=False)
            if not rows3:
                # print "\t\t ",
                # rows3 = search_db (cursor, qry, verbose = True)
                continue
            ortho_stable = rows3[0][0]
            # print "\t\t ortho stable:", ortho_stable
            orthos.append(ortho_stable)
    if orthos:
        switch_to_db(cursor, ensembl_db_name[species])
        orthos = map(lambda gene_id: stable2gene(cursor, gene_id), orthos)
    # print 'orthos:', orthos
    return orthos
예제 #55
0
def get_logic_name(cursor, analysis_id, db_name = None):

    if analysis_id < 0:
        return ''

    if (db_name):
        if not switch_to_db(cursor, db_name):
            return False
    qry = "SELECT logic_name FROM analysis WHERE analysis_id = %d" % analysis_id
    rows    = search_db (cursor, qry)
    if (not rows):
        logic_name = ''
    else:
        logic_name = rows[0][0]
    return logic_name 
예제 #56
0
def gene2canon_transl(cursor, gene_id, db_name=None,):

    if  (db_name and not switch_to_db(cursor, db_name)):
            return False
 
    qry  = "select translation.translation_id  from translation, gene "
    qry += " where gene.canonical_transcript_id = translation.transcript_id "
    qry += " and gene.gene_id = %d " % gene_id
    rows = search_db (cursor, qry, verbose = False)
    
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return ""

    return  rows[0][0]
예제 #57
0
def get_logic_name(cursor, analysis_id, db_name=None):

    if analysis_id < 0:
        return ""

    if db_name:
        if not switch_to_db(cursor, db_name):
            return False
    qry = "SELECT logic_name FROM analysis WHERE analysis_id = %d" % analysis_id
    rows = search_db(cursor, qry)
    if not rows:
        logic_name = ""
    else:
        logic_name = rows[0][0]
    return logic_name
예제 #58
0
def gene2stable_canon_transl(cursor, gene_id, db_name=None):

    if db_name and not switch_to_db(cursor, db_name):
        return False

    qry = "select translation.stable_id  from translation, gene "
    qry += " where gene.canonical_transcript_id = translation.transcript_id "
    qry += " and gene.gene_id = %d " % gene_id
    rows = search_db(cursor, qry, verbose=False)

    if not rows:
        rows = search_db(cursor, qry, verbose=True)
        return ""

    return rows[0][0]
예제 #59
0
def stable2gene (cursor, stable_id=None, db_name=None):

    if (not stable_id):
        return 0

    if (db_name and not switch_to_db(cursor, db_name)):
            return False

    qry = "select gene_id from gene where stable_id='%s'" % stable_id
    rows = search_db (cursor, qry, verbose = False)
    
    if (not rows):
        rows = search_db (cursor, qry, verbose = True)
        return 0

    return int(rows[0][0])