def createRelease(release): sqls = ['''CREATE TABLE IF NOT EXISTS {} (id tinyint unsigned auto_increment primary key, name varchar(100) NOT NULL) ENGINE = InnoDB'''.format(releaseTable(release, 'divergences')), '''CREATE TABLE IF NOT EXISTS {} (id tinyint unsigned auto_increment primary key, name varchar(100) NOT NULL) ENGINE = InnoDB'''.format(releaseTable(release, 'evalues')), '''CREATE TABLE IF NOT EXISTS {} (id int unsigned auto_increment primary key, external_sequence_id varchar(100) NOT NULL, genome_id smallint(5) unsigned NOT NULL, gene_name varchar(100), gene_id int, KEY genome_index (genome_id), UNIQUE KEY sequence_index (external_sequence_id), UNIQUE KEY sequence_and_genome (external_sequence_id, genome_id) ) ENGINE = InnoDB'''.format(releaseTable(release, 'sequence')), '''CREATE TABLE IF NOT EXISTS {} (id int unsigned auto_increment primary key, sequence_id int unsigned NOT NULL, go_term_acc varchar(255) NOT NULL, go_term_name varchar(255) NOT NULL, go_term_type varchar(55) NOT NULL, KEY sequence_index (sequence_id), UNIQUE KEY sequence_and_acc_index (sequence_id, go_term_acc) ) ENGINE = InnoDB'''.format(releaseTable(release, 'sequence_to_go_term')), ] createGenomes(release) with connCM() as conn: for sql in sqls: print sql dbutil.executeSQL(sql=sql, conn=conn)
def set(self, key, value): encodedValue = json.dumps(value) sql = "INSERT INTO " + self.table + " (id, value, create_time, mod_time, access_time) VALUES (%s, %s, NOW(), NOW(), NOW()) " sql += " ON DUPLICATE KEY UPDATE value=%s, mod_time=NOW(), access_time=NOW() " with self.manager as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql, args=[self._cache_hash(key), encodedValue, encodedValue])
def drop(self): ''' drop the cache table in the database. ''' sql = "DROP TABLE IF EXISTS " + self.table with self.manager as conn: dbutil.executeSQL(conn, sql)
def dropRelease(release): sqls = ['DROP TABLE IF EXISTS {}'.format(releaseTable(release, 'divergences')), 'DROP TABLE IF EXISTS {}'.format(releaseTable(release, 'evalues')), 'DROP TABLE IF EXISTS {}'.format(releaseTable(release, 'sequence')), 'DROP TABLE IF EXISTS {}'.format(releaseTable(release, 'sequence_to_go_term')), ] dropGenomes(release) with connCM() as conn: for sql in sqls: print sql dbutil.executeSQL(sql=sql, conn=conn)
def create(self): sql = '''CREATE TABLE IF NOT EXISTS ''' + self.table + ''' ( id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255) NOT NULL UNIQUE KEY, value blob, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, INDEX key_index (name) ) ENGINE = InnoDB ''' with self.connect() as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql) return self
def create(self): sql = '''CREATE TABLE IF NOT EXISTS ''' + self.table + ''' ( id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255) NOT NULL UNIQUE KEY, value blob, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, INDEX key_index (name) ) ENGINE = InnoDB ''' with self.manager as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql) return self
def createGenomes(release): sql = '''CREATE TABLE IF NOT EXISTS {} (id smallint unsigned auto_increment primary key, acc varchar(100) NOT NULL, name varchar(255) NOT NULL, ncbi_taxon varchar(20) NOT NULL, taxon_name varchar(255) NOT NULL, taxon_category_code varchar(10) NOT NULL, taxon_category_name varchar(255) NOT NULL, num_seqs int unsigned NOT NULL, UNIQUE KEY genome_acc_key (acc)) ENGINE = InnoDB'''.format(releaseTable(release, 'genomes')) with connCM() as conn: print sql dbutil.executeSQL(sql=sql, conn=conn)
def loadGenomes(release, genomesFile): ''' release: the id of the release being loaded genomesFile: each line contains a tab-separated id (integer) and external genome id/name (string). The ids should go from 1 to N (where N is the number of genomes.) Genomes should be unique. Why use LOAD DATA INFILE? Because it is very fast relative to insert. a discussion of insertion speed: http://dev.mysql.com/doc/refman/5.1/en/insert-speed.html ''' sql = 'LOAD DATA LOCAL INFILE %s INTO TABLE {}'.format(releaseTable(release, 'genomes')) args = [genomesFile] with connCM() as conn: print sql, args dbutil.executeSQL(sql=sql, conn=conn, args=args)
def deleteGenomeByName(release, genome, conn=None): ''' genome: name of genome, like 'Takifugu_rubripes.aa'. deletes all roundup results in mysql containing this genome. returns: nothing. ''' # logging.debug('deleteGenomeByName(): genome=%s'%genome) with connCM(conn=conn) as conn: dbId = getIdForGenome(release, genome, conn) if not dbId: return print 'dbId=%s'%dbId # remove from roundup_results sql = 'DELETE FROM {} WHERE query_db=%s OR subject_db=%s'.format(releaseTable(release, 'results')) print sql dbutil.executeSQL(sql=sql, conn=conn, args=[dbId, dbId]) # delete sequence to go term entries for the given genome sql = 'DELETE FROM rs2gt USING {} AS rs2gt INNER JOIN {} AS rs WHERE rs2gt.sequence_id = rs.id AND rs.genome_id=%s' sql = sql.format(releaseTable(release, 'sequence_to_go_term'), releaseTable(release, 'sequence')) print sql dbutil.executeSQL(sql=sql, conn=conn, args=[dbId]) # delete sequences from the genome. sql = 'DELETE FROM {} WHERE genome_id=%s'.format(releaseTable(release, 'sequence')) print sql dbutil.executeSQL(sql=sql, conn=conn, args=[dbId]) # remove genome sql = 'DELETE FROM {} WHERE id=%s'.format(releaseTable(release, 'genomes')) print sql dbutil.executeSQL(sql=sql, conn=conn, args=[dbId])
def create(self): ''' create the cache table in the database. the table must be created before the cache functions (get, set, etc.) will work ''' sql = '''CREATE TABLE IF NOT EXISTS ''' + self.table + ''' ( `id` varchar(40) NOT NULL default '', `value` mediumtext, `create_time` datetime default NULL, `mod_time` datetime default NULL, `access_time` datetime default NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1''' with self.manager as conn: dbutil.executeSQL(conn, sql)
def _create(self): sql = '''CREATE TABLE IF NOT EXISTS message_queue ( id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, queue varchar(200) NOT NULL, message blob, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, read_time TIMESTAMP, lock_time TIMESTAMP, timeout INT NOT NULL, locked BOOLEAN NOT NULL DEFAULT FALSE, INDEX queue_index (queue) ) ENGINE = InnoDB ''' with self.manager as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql)
def get(self, key, default=None): sql = " SELECT value FROM " + self.table + " WHERE id=%s" with self.manager as conn: results = dbutil.selectSQL(conn, sql, args=[self._cache_hash(key)]) if results: value = json.loads(results[0][0]) else: value = default # update access time sql = "UPDATE " + self.table + " SET access_time=NOW() WHERE id=%s" with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql, args=[self._cache_hash(key)]) return value
def createReleaseResults(release): sql = '''CREATE TABLE IF NOT EXISTS {} (id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, query_db SMALLINT UNSIGNED NOT NULL, subject_db SMALLINT UNSIGNED NOT NULL, divergence TINYINT UNSIGNED NOT NULL, evalue TINYINT UNSIGNED NOT NULL, mod_time DATETIME DEFAULT NULL, orthologs LONGBLOB, num_orthologs INT UNSIGNED NOT NULL, KEY query_db_index (query_db), KEY subject_db_index (subject_db), UNIQUE KEY params_key (query_db, subject_db, divergence, evalue) ) ENGINE = InnoDB'''.format(releaseTable(release, 'results')) with connCM() as conn: print sql dbutil.executeSQL(sql=sql, conn=conn)
def _readUnhandled(self): ''' Reads the next message from the queue. Returns: message_id, message. Use message_id to delete() the message when done or to changeTimeout() of the message if necessary. ''' with self.manager as conn: with dbutil.doTransaction(conn): # read first available message (pending or lock timeout) sql = 'SELECT id, message FROM message_queue WHERE queue = %s AND (NOT locked OR lock_time < CURRENT_TIMESTAMP)' sql += ' ORDER BY id ASC LIMIT 1 FOR UPDATE ' results = dbutil.selectSQL(conn, sql, args=[self.queue]) if results: id, message = results[0] # mark message unavailable for reading for timeout seconds. sql = 'UPDATE message_queue SET locked = TRUE, read_time = CURRENT_TIMESTAMP, lock_time = ADDTIME(CURRENT_TIMESTAMP, SEC_TO_TIME(timeout)) WHERE id = %s' dbutil.executeSQL(conn, sql, args=[id]) return id, message else: raise EmptyQueueError(str(self.queue))
def loadRelease(release, genomesFile, divergencesFile, evaluesFile, seqsFile, seqToGoTermsFile): ''' release: the id of the release being loaded genomesFile: each line contains a tab-separated id (integer) and external genome id/name (string). The ids should go from 1 to N (where N is the number of genomes.) Genomes should be unique. Why use LOAD DATA INFILE? Because it is very fast relative to insert. a discussion of insertion speed: http://dev.mysql.com/doc/refman/5.1/en/insert-speed.html ''' loadGenomes(release, genomesFile) sqls = ['LOAD DATA LOCAL INFILE %s INTO TABLE {}'.format(releaseTable(release, 'divergences')), 'LOAD DATA LOCAL INFILE %s INTO TABLE {}'.format(releaseTable(release, 'evalues')), 'LOAD DATA LOCAL INFILE %s INTO TABLE {}'.format(releaseTable(release, 'sequence')), 'LOAD DATA LOCAL INFILE %s INTO TABLE {}'.format(releaseTable(release, 'sequence_to_go_term')), ] argsList = [[divergencesFile], [evaluesFile], [seqsFile], [seqToGoTermsFile]] with connCM() as conn: for sql, args in zip(sqls, argsList): print sql, args dbutil.executeSQL(sql=sql, conn=conn, args=args)
def drop(self): with self.manager as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, 'DROP TABLE IF EXISTS ' + self.table) return self
def remove(self, key): encodedKey = json.dumps(key) sql = 'DELETE FROM ' + self.table + ' WHERE name = %s' with self.connect() as conn: with dbutil.doTransaction(conn): return dbutil.executeSQL(conn, sql, args=[encodedKey])
def drop(self): with self.connect() as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, 'DROP TABLE IF EXISTS ' + self.table) return self
def _drop(self): with self.manager as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, 'drop table if exists message_queue')
def changeTimeout(self, id, timeout): ''' changes read lock to <timeout> seconds from now. ''' sql = 'UPDATE message_queue SET lock_time = ADDTIME(CURRENT_TIMESTAMP, SEC_TO_TIME(%s)) WHERE id = %s' with self.manager as conn: with dbutil.doTransaction(conn): return dbutil.executeSQL(conn, sql, args=[timeout, id])
def delete(self, id): sql = 'DELETE FROM message_queue WHERE id = %s ' with self.manager as conn: with dbutil.doTransaction(conn): return dbutil.executeSQL(conn, sql, args=[id])
def dropGenomes(release): sql = 'DROP TABLE IF EXISTS {}'.format(releaseTable(release, 'genomes')) with connCM() as conn: print sql dbutil.executeSQL(sql=sql, conn=conn)
def remove(self, key): sql = "DELETE FROM " + self.table + " WHERE id=%s" with self.manager as conn: with dbutil.doTransaction(conn): dbutil.executeSQL(conn, sql, args=[self._cache_hash(key)])
def remove(self, key): encodedKey = json.dumps(key) sql = 'DELETE FROM ' + self.table + ' WHERE name = %s' with self.manager as conn: with dbutil.doTransaction(conn): return dbutil.executeSQL(conn, sql, args=[encodedKey])