Exemple #1
0
def findGeneNameGenomePairsLike(release, substring, searchType=CONTAINS_TYPE, conn=None):
    '''
    substring: search for gene names containing this string somehow.
    searchType: specify how the gene name should contain substring
    genomes names are like Homo_sapiens.aa
    returns: list of unique pairs of gene name and genome names for every gene name containing substring according to the searchType
    mapped to all genomes that contain a seq id that has that gene name.
    '''
    sql = ' SELECT DISTINCT rs.gene_name, rg.acc'
    sql += ' FROM {} rs JOIN {} rg '.format(releaseTable(release, 'sequence'), releaseTable(release, 'genomes'))
    sql += ' WHERE rs.gene_name LIKE %s'
    if searchType == CONTAINS_TYPE:
        args = ['%' + substring + '%']
    elif searchType == STARTS_WITH_TYPE:
        args = [substring + '%']
    elif searchType == ENDS_WITH_TYPE:
        args = ['%' + substring]
    elif searchType == EQUALS_TYPE:
        args = [substring]
    else:
        raise Exception('Unrecognized searchType.  searchType=%s'%searchType)
    sql += ' AND rs.genome_id = rg.id '
    sql += ' ORDER BY rg.acc, rs.gene_name '
    
    # results are a tuple of tuples which containing an gene name and genome and genome name.
    with connCM(conn=conn) as conn:
        return [tuple(row) for row in dbutil.selectSQL(sql=sql, args=args, conn=conn)]
Exemple #2
0
def numOrthologs(release, conn=None):
    num = 0
    with connCM(conn=conn) as conn:
        sql = 'SELECT num_orthologs FROM {}'.format(releaseTable(release, 'results'))
        rows = dbutil.selectSQL(sql=sql, conn=conn)
        num = sum(row[0] for row in rows)
    return num
Exemple #3
0
 def exists(self, key):
     encodedKey = json.dumps(key)
     with self.manager as conn:
         sql = 'SELECT id FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         return bool(
             results)  # True if there are any results, False otherwise.
Exemple #4
0
def selectOne(conn, sql, args=None):
    '''
    helper function that selects the first column of the first row
    assumption: there is at least one row with one column.  expect an error otherwise.
    '''
    with connCM(conn=conn) as conn:
        return dbutil.selectSQL(sql=sql, conn=conn, args=args)[0][0]
Exemple #5
0
def getGenomesAndNames(release):
    '''
    returns a list of pairs of genome (e.g. MYCGE) and name (e.g. Mycoplasma genitalium)
    '''
    sql = 'SELECT acc, name FROM {}'.format(releaseTable(release, 'genomes'))
    logging.debug(sql)
    with connCM() as conn:
        return dbutil.selectSQL(sql=sql, conn=conn)
Exemple #6
0
 def get(self, key, default=None):
     encodedKey = json.dumps(key)
     with self.manager as conn:
         sql = 'SELECT value FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         if results:
             value = json.loads(results[0][0])
         else:
             value = default
         return value
Exemple #7
0
 def get(self, key, default=None):
     encodedKey = json.dumps(key)
     with self.connect() as conn:
         sql = 'SELECT value FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         if results:
             value = json.loads(results[0][0])
         else:
             value = default
         return value
Exemple #8
0
def getGenomesData(release):
    '''
    returns a list of tuples, one for each genome, of acc, name, ncbi_taxon, taxon_category_code,
    taxon_category_name, and num_seqs.
    '''
    sql = '''SELECT acc, name, ncbi_taxon, taxon_category_code, taxon_category_name, num_seqs
    FROM {}'''.format(releaseTable(release, 'genomes'))
    # logging.debug(sql)
    with connCM() as conn:
        return dbutil.selectSQL(sql=sql, conn=conn)
Exemple #9
0
    def get(self, key, default=None):
        sql = " SELECT value FROM " + self.table + " WHERE id=%s"
        with self.manager as conn:
            results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)])
            if results:
                value = json.loads(results[0][0])
            else:
                value = default

            # update access time
            sql = "UPDATE " + self.table + " SET access_time=NOW() WHERE id=%s"
            with dbutil.doTransaction(conn):
                dbutil.executeSQL(conn, sql, args=[self._cache_hash(key)])

            return value
Exemple #10
0
def getOrthologs(release, qdb, sdb, divergence='0.2', evalue='1e-20', conn=None):
    '''
    divergence: ortholog must have this divergence.  defaults to 0.2
    evalue: ortholog must have this evalue.  defaults to 1e-20.
    '''
    with connCM(conn=conn) as conn:
        qdbId = getIdForGenome(release, qdb, conn)
        sdbId = getIdForGenome(release, sdb, conn)
        divId = getIdForDivergence(release, divergence, conn)
        evalueId = getIdForEvalue(release, evalue, conn)
        sql = 'SELECT rr.orthologs '
        sql += ' FROM {} rr'.format(releaseTable(release, 'results'))
        sql += ' WHERE rr.query_db = %s AND rr.subject_db = %s AND rr.divergence = %s AND rr.evalue = %s '
        args = [qdbId, sdbId, divId, evalueId]
        logging.debug('sql='+sql)
        logging.debug('args='+str(args))
        rows = dbutil.selectSQL(sql=sql, args=args, conn=conn)
        return decodeOrthologs(rows[0][0])
Exemple #11
0
def getSeqIdsForGeneName(release, geneName, genome=None, conn=None):
    '''
    geneName: gene name or symbol, e.g. 'acsC'
    genome: genome id, e.g. 'Homo_sapiens.aa'.  If None, all seq ids with the gene name are returned.
    conn: db connection to use.  If None, one will be created.
    returns: list of sequence ids, i.e. a GI, ensembl id, etc., which have the given gene name.
    '''
    with connCM(conn=conn) as conn:
        sql = 'SELECT DISTINCT rs.external_sequence_id '
        sql += ' FROM {} rs '.format(releaseTable(release, 'sequence'))
        sql += ' WHERE rs.gene_name = %s '
        params = [geneName]
        if genome:
            dbId = getIdForGenome(release, genome, conn)
            sql += ' AND rs.genome_id = %s '
            params.append(dbId)

        # results are a tuple of tuples which containing an id.  convert to a list of ids.
        return [row[0] for row in dbutil.selectSQL(sql=sql, args=params, conn=conn)]
 def _readUnhandled(self):
     '''
     Reads the next message from the queue.
     Returns: message_id, message.
     Use message_id to delete() the message when done or to changeTimeout() of the message if necessary.
     '''
     with self.manager as conn:
         with dbutil.doTransaction(conn):
             # read first available message (pending or lock timeout)
             sql = 'SELECT id, message FROM message_queue WHERE queue = %s AND (NOT locked OR  lock_time < CURRENT_TIMESTAMP)'
             sql += ' ORDER BY id ASC LIMIT 1 FOR UPDATE '
             results = dbutil.selectSQL(conn, sql, args=[self.queue])
             if results:
                 id, message = results[0]
                 # mark message unavailable for reading for timeout seconds.
                 sql = 'UPDATE message_queue SET locked = TRUE, read_time = CURRENT_TIMESTAMP, lock_time = ADDTIME(CURRENT_TIMESTAMP, SEC_TO_TIME(timeout)) WHERE id = %s'
                 dbutil.executeSQL(conn, sql, args=[id])
                 return id, message
             else:
                 raise EmptyQueueError(str(self.queue))
Exemple #13
0
def findGeneNamesLike(release, substring, searchType=CONTAINS_TYPE, conn=None):
    '''
    substring: search for gene names containing this string somehow.
    searchType: specify how the gene name should contain substring
    returns: list of every gene name containing substring according to the searchType.
    '''
    sql = ' SELECT DISTINCT rs.gene_name FROM {} rs '.format(releaseTable(release, 'sequence'))
    sql += ' WHERE rs.gene_name LIKE %s'
    if searchType == CONTAINS_TYPE:
        args = ['%' + substring + '%']
    elif searchType == STARTS_WITH_TYPE:
        args = [substring + '%']
    elif searchType == ENDS_WITH_TYPE:
        args = ['%' + substring]
    elif searchType == EQUALS_TYPE:
        args = [substring]
    else:
        raise Exception('Unrecognized searchType.  searchType=%s'%searchType)

    # results are a tuple of tuples which containing an id.  convert to a list of ids.
    with connCM(conn=conn) as conn:
        return [row[0] for row in dbutil.selectSQL(sql=sql, args=args, conn=conn)]
Exemple #14
0
def getIdForName(name, table, conn=None):
    sql = 'SELECT id FROM {} WHERE name=%s'.format(table)
    with connCM(conn=conn) as conn:
        rowset = dbutil.selectSQL(sql=sql, conn=conn, args=[name])
        return rowset[0][0]
Exemple #15
0
def getNameForId(id, table, conn=None):
    sql = 'SELECT name FROM {} WHERE id=%s'.format(table)
    with connCM(conn=conn) as conn:
        rowset = dbutil.selectSQL(sql=sql, conn=conn, args=[id])
        return rowset[0][0]
Exemple #16
0
def getGenomeToId(release):
    sql = 'select acc, id from {}'.format(releaseTable(release, 'genomes'))
    with connCM() as conn:
        return dict(dbutil.selectSQL(sql=sql, conn=conn))
Exemple #17
0
def getEvalueToId(release):
    sql = 'select name, id from {}'.format(releaseTable(release, 'evalues'))
    with connCM() as conn:
        return dict(dbutil.selectSQL(sql=sql, conn=conn))
Exemple #18
0
def getSequenceToId(release):
    sql = 'select external_sequence_id, id from {}'.format(releaseTable(release, 'sequence'))
    with connCM() as conn:
        return dict(dbutil.selectSQL(sql=sql, conn=conn))
Exemple #19
0
 def selectSequenceDataRows():
     return dbutil.selectSQL(sql=sql, conn=conn)
Exemple #20
0
 def selectTermsRows():
     return dbutil.selectSQL(sql=sql, conn=conn)
Exemple #21
0
 def exists(self, key):
     encodedKey = json.dumps(key)
     with self.connect() as conn:
         sql = 'SELECT id FROM ' + self.table + ' WHERE name = %s'
         results = dbutil.selectSQL(conn, sql, args=[encodedKey])
         return bool(results) # True if there are any results, False otherwise.
Exemple #22
0
 def has_key(self, key):
     sql = " SELECT id FROM " + self.table + " WHERE id=%s"
     with self.manager as conn:
         results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)])
         foundKey = bool(results)
         return foundKey