def findGeneNameGenomePairsLike(release, substring, searchType=CONTAINS_TYPE, conn=None): ''' substring: search for gene names containing this string somehow. searchType: specify how the gene name should contain substring genomes names are like Homo_sapiens.aa returns: list of unique pairs of gene name and genome names for every gene name containing substring according to the searchType mapped to all genomes that contain a seq id that has that gene name. ''' sql = ' SELECT DISTINCT rs.gene_name, rg.acc' sql += ' FROM {} rs JOIN {} rg '.format(releaseTable(release, 'sequence'), releaseTable(release, 'genomes')) sql += ' WHERE rs.gene_name LIKE %s' if searchType == CONTAINS_TYPE: args = ['%' + substring + '%'] elif searchType == STARTS_WITH_TYPE: args = [substring + '%'] elif searchType == ENDS_WITH_TYPE: args = ['%' + substring] elif searchType == EQUALS_TYPE: args = [substring] else: raise Exception('Unrecognized searchType. searchType=%s'%searchType) sql += ' AND rs.genome_id = rg.id ' sql += ' ORDER BY rg.acc, rs.gene_name ' # results are a tuple of tuples which containing an gene name and genome and genome name. with connCM(conn=conn) as conn: return [tuple(row) for row in dbutil.selectSQL(sql=sql, args=args, conn=conn)]
def numOrthologs(release, conn=None): num = 0 with connCM(conn=conn) as conn: sql = 'SELECT num_orthologs FROM {}'.format(releaseTable(release, 'results')) rows = dbutil.selectSQL(sql=sql, conn=conn) num = sum(row[0] for row in rows) return num
def exists(self, key): encodedKey = json.dumps(key) with self.manager as conn: sql = 'SELECT id FROM ' + self.table + ' WHERE name = %s' results = dbutil.selectSQL(conn, sql, args=[encodedKey]) return bool( results) # True if there are any results, False otherwise.
def selectOne(conn, sql, args=None): ''' helper function that selects the first column of the first row assumption: there is at least one row with one column. expect an error otherwise. ''' with connCM(conn=conn) as conn: return dbutil.selectSQL(sql=sql, conn=conn, args=args)[0][0]
def getGenomesAndNames(release): ''' returns a list of pairs of genome (e.g. MYCGE) and name (e.g. Mycoplasma genitalium) ''' sql = 'SELECT acc, name FROM {}'.format(releaseTable(release, 'genomes')) logging.debug(sql) with connCM() as conn: return dbutil.selectSQL(sql=sql, conn=conn)
def get(self, key, default=None): encodedKey = json.dumps(key) with self.manager as conn: sql = 'SELECT value FROM ' + self.table + ' WHERE name = %s' results = dbutil.selectSQL(conn, sql, args=[encodedKey]) if results: value = json.loads(results[0][0]) else: value = default return value
def get(self, key, default=None): encodedKey = json.dumps(key) with self.connect() as conn: sql = 'SELECT value FROM ' + self.table + ' WHERE name = %s' results = dbutil.selectSQL(conn, sql, args=[encodedKey]) if results: value = json.loads(results[0][0]) else: value = default return value
def getGenomesData(release): ''' returns a list of tuples, one for each genome, of acc, name, ncbi_taxon, taxon_category_code, taxon_category_name, and num_seqs. ''' sql = '''SELECT acc, name, ncbi_taxon, taxon_category_code, taxon_category_name, num_seqs FROM {}'''.format(releaseTable(release, 'genomes')) # logging.debug(sql) with connCM() as conn: return dbutil.selectSQL(sql=sql, conn=conn)
def get(self, key, default=None): sql = " SELECT value FROM " + self.table + " WHERE id=%s" with self.manager as conn: results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)]) if results: value = json.loads(results[0][0]) else: value = default # update access time sql = "UPDATE " + self.table + " SET access_time=NOW() WHERE id=%s" with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql, args=[self._cache_hash(key)]) return value
def getOrthologs(release, qdb, sdb, divergence='0.2', evalue='1e-20', conn=None): ''' divergence: ortholog must have this divergence. defaults to 0.2 evalue: ortholog must have this evalue. defaults to 1e-20. ''' with connCM(conn=conn) as conn: qdbId = getIdForGenome(release, qdb, conn) sdbId = getIdForGenome(release, sdb, conn) divId = getIdForDivergence(release, divergence, conn) evalueId = getIdForEvalue(release, evalue, conn) sql = 'SELECT rr.orthologs ' sql += ' FROM {} rr'.format(releaseTable(release, 'results')) sql += ' WHERE rr.query_db = %s AND rr.subject_db = %s AND rr.divergence = %s AND rr.evalue = %s ' args = [qdbId, sdbId, divId, evalueId] logging.debug('sql='+sql) logging.debug('args='+str(args)) rows = dbutil.selectSQL(sql=sql, args=args, conn=conn) return decodeOrthologs(rows[0][0])
def getSeqIdsForGeneName(release, geneName, genome=None, conn=None): ''' geneName: gene name or symbol, e.g. 'acsC' genome: genome id, e.g. 'Homo_sapiens.aa'. If None, all seq ids with the gene name are returned. conn: db connection to use. If None, one will be created. returns: list of sequence ids, i.e. a GI, ensembl id, etc., which have the given gene name. ''' with connCM(conn=conn) as conn: sql = 'SELECT DISTINCT rs.external_sequence_id ' sql += ' FROM {} rs '.format(releaseTable(release, 'sequence')) sql += ' WHERE rs.gene_name = %s ' params = [geneName] if genome: dbId = getIdForGenome(release, genome, conn) sql += ' AND rs.genome_id = %s ' params.append(dbId) # results are a tuple of tuples which containing an id. convert to a list of ids. return [row[0] for row in dbutil.selectSQL(sql=sql, args=params, conn=conn)]
def _readUnhandled(self): ''' Reads the next message from the queue. Returns: message_id, message. Use message_id to delete() the message when done or to changeTimeout() of the message if necessary. ''' with self.manager as conn: with dbutil.doTransaction(conn): # read first available message (pending or lock timeout) sql = 'SELECT id, message FROM message_queue WHERE queue = %s AND (NOT locked OR lock_time < CURRENT_TIMESTAMP)' sql += ' ORDER BY id ASC LIMIT 1 FOR UPDATE ' results = dbutil.selectSQL(conn, sql, args=[self.queue]) if results: id, message = results[0] # mark message unavailable for reading for timeout seconds. sql = 'UPDATE message_queue SET locked = TRUE, read_time = CURRENT_TIMESTAMP, lock_time = ADDTIME(CURRENT_TIMESTAMP, SEC_TO_TIME(timeout)) WHERE id = %s' dbutil.executeSQL(conn, sql, args=[id]) return id, message else: raise EmptyQueueError(str(self.queue))
def findGeneNamesLike(release, substring, searchType=CONTAINS_TYPE, conn=None): ''' substring: search for gene names containing this string somehow. searchType: specify how the gene name should contain substring returns: list of every gene name containing substring according to the searchType. ''' sql = ' SELECT DISTINCT rs.gene_name FROM {} rs '.format(releaseTable(release, 'sequence')) sql += ' WHERE rs.gene_name LIKE %s' if searchType == CONTAINS_TYPE: args = ['%' + substring + '%'] elif searchType == STARTS_WITH_TYPE: args = [substring + '%'] elif searchType == ENDS_WITH_TYPE: args = ['%' + substring] elif searchType == EQUALS_TYPE: args = [substring] else: raise Exception('Unrecognized searchType. searchType=%s'%searchType) # results are a tuple of tuples which containing an id. convert to a list of ids. with connCM(conn=conn) as conn: return [row[0] for row in dbutil.selectSQL(sql=sql, args=args, conn=conn)]
def getIdForName(name, table, conn=None): sql = 'SELECT id FROM {} WHERE name=%s'.format(table) with connCM(conn=conn) as conn: rowset = dbutil.selectSQL(sql=sql, conn=conn, args=[name]) return rowset[0][0]
def getNameForId(id, table, conn=None): sql = 'SELECT name FROM {} WHERE id=%s'.format(table) with connCM(conn=conn) as conn: rowset = dbutil.selectSQL(sql=sql, conn=conn, args=[id]) return rowset[0][0]
def getGenomeToId(release): sql = 'select acc, id from {}'.format(releaseTable(release, 'genomes')) with connCM() as conn: return dict(dbutil.selectSQL(sql=sql, conn=conn))
def getEvalueToId(release): sql = 'select name, id from {}'.format(releaseTable(release, 'evalues')) with connCM() as conn: return dict(dbutil.selectSQL(sql=sql, conn=conn))
def getSequenceToId(release): sql = 'select external_sequence_id, id from {}'.format(releaseTable(release, 'sequence')) with connCM() as conn: return dict(dbutil.selectSQL(sql=sql, conn=conn))
def selectSequenceDataRows(): return dbutil.selectSQL(sql=sql, conn=conn)
def selectTermsRows(): return dbutil.selectSQL(sql=sql, conn=conn)
def exists(self, key): encodedKey = json.dumps(key) with self.connect() as conn: sql = 'SELECT id FROM ' + self.table + ' WHERE name = %s' results = dbutil.selectSQL(conn, sql, args=[encodedKey]) return bool(results) # True if there are any results, False otherwise.
def has_key(self, key): sql = " SELECT id FROM " + self.table + " WHERE id=%s" with self.manager as conn: results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)]) foundKey = bool(results) return foundKey