class MySQLSchemaInstaller(AbstractSchemaInstaller): implements(ISchemaInstaller) database_type = 'mysql' def get_database_name(self, cursor): cursor.execute("SELECT DATABASE()") for (name,) in cursor: return name def list_tables(self, cursor): cursor.execute("SHOW TABLES") return [name for (name,) in cursor.fetchall()] def list_sequences(self, cursor): return [] def check_compatibility(self, cursor, tables): super(MySQLSchemaInstaller, self).check_compatibility(cursor, tables) stmt = "SHOW TABLE STATUS LIKE 'object_state'" cursor.execute(stmt) for row in cursor: for col_index, col in enumerate(cursor.description): if col[0].lower() == 'engine': engine = row[col_index] if engine.lower() != 'innodb': raise StorageError( "The object_state table must use the InnoDB " "engine, but it is using the %s engine." % engine)
class MySQLLocker(Locker): implements(ILocker) @metricmethod def hold_commit_lock(self, cursor, ensure_current=False, nowait=False): timeout = not nowait and self.commit_lock_timeout or 0 stmt = "SELECT GET_LOCK(CONCAT(DATABASE(), '.commit'), %s)" cursor.execute(stmt, (timeout, )) locked = cursor.fetchone()[0] if nowait and locked in (0, 1): return bool(locked) if not locked: raise StorageError("Unable to acquire commit lock") def release_commit_lock(self, cursor): stmt = "SELECT RELEASE_LOCK(CONCAT(DATABASE(), '.commit'))" cursor.execute(stmt) def hold_pack_lock(self, cursor): """Try to acquire the pack lock. Raise an exception if packing or undo is already in progress. """ stmt = "SELECT GET_LOCK(CONCAT(DATABASE(), '.pack'), 0)" cursor.execute(stmt) res = cursor.fetchone()[0] if not res: raise StorageError('A pack or undo operation is in progress') def release_pack_lock(self, cursor): """Release the pack lock.""" stmt = "SELECT RELEASE_LOCK(CONCAT(DATABASE(), '.pack'))" cursor.execute(stmt)
class HistoryFreeDatabaseIterator(DatabaseIterator): implements(IDatabaseIterator) def iter_transactions(self, cursor): """Iterate over the transaction log, newest first. Skips packed transactions. Yields (tid, username, description, extension) for each transaction. """ return [] def iter_transactions_range(self, cursor, start=None, stop=None): """Iterate over the transactions in the given range, oldest first. Includes packed transactions. Yields (tid, username, description, extension, packed) for each transaction. """ stmt = """ SELECT DISTINCT tid FROM object_state WHERE tid > 0 """ if start is not None: stmt += " AND tid >= %(min_tid)s" if stop is not None: stmt += " AND tid <= %(max_tid)s" stmt += " ORDER BY tid" self.runner.run_script_stmt(cursor, stmt, { 'min_tid': start, 'max_tid': stop }) return ((tid, '', '', '', True) for (tid, ) in cursor) def iter_object_history(self, cursor, oid): """Iterate over an object's history. Raises KeyError if the object does not exist. Yields (tid, username, description, extension, pickle_size) for each modification. """ stmt = """ SELECT tid, state_size FROM object_state WHERE zoid = %(oid)s """ self.runner.run_script_stmt(cursor, stmt, {'oid': oid}) return ((tid, '', '', '', size) for (tid, size) in cursor)
class PostgreSQLTransactionControl(TransactionControl): implements(ITransactionControl) def __init__(self, keep_history): self.keep_history = keep_history def get_tid(self, cursor): """Returns the most recent tid.""" if self.keep_history: stmt = """ SELECT tid FROM transaction ORDER BY tid DESC LIMIT 1 """ cursor.execute(stmt) else: stmt = """ SELECT tid FROM object_state ORDER BY tid DESC LIMIT 1 """ cursor.execute(stmt) if not cursor.rowcount: # nothing has been stored yet return 0 assert cursor.rowcount == 1 return cursor.fetchone()[0] def add_transaction(self, cursor, tid, username, description, extension, packed=False): """Add a transaction.""" if self.keep_history: stmt = """ INSERT INTO transaction (tid, packed, username, description, extension) VALUES (%s, %s, decode(%s, 'base64'), decode(%s, 'base64'), decode(%s, 'base64')) """ cursor.execute(stmt, (tid, packed, encode_bytes_param(username, True), encode_bytes_param(description, True), encode_bytes_param(extension, True)))
class MySQLOIDAllocator(object): implements(IOIDAllocator) def set_min_oid(self, cursor, oid): """Ensure the next OID is at least the given OID.""" n = (oid + 15) // 16 cursor.execute("REPLACE INTO new_oid VALUES(%s)", (n, )) @metricmethod def new_oids(self, cursor): """Return a sequence of new, unused OIDs.""" stmt = "INSERT INTO new_oid VALUES ()" cursor.execute(stmt) n = cursor.connection.insert_id() if n % 100 == 0: # Clean out previously generated OIDs. stmt = "DELETE FROM new_oid WHERE zoid < %s" cursor.execute(stmt, (n, )) return range(n * 16 - 15, n * 16 + 1)
class PostgreSQLOIDAllocator(object): implements(IOIDAllocator) def set_min_oid(self, cursor, oid): """Ensure the next OID is at least the given OID.""" n = (oid + 15) // 16 cursor.execute( """ SELECT CASE WHEN %s > nextval('zoid_seq') THEN setval('zoid_seq', %s) ELSE 0 END """, (n, n)) @metricmethod def new_oids(self, cursor): """Return a sequence of new, unused OIDs.""" stmt = "SELECT NEXTVAL('zoid_seq')" cursor.execute(stmt) n = cursor.fetchone()[0] return range(n * 16 - 15, n * 16 + 1)
class OracleOIDAllocator(object): implements(IOIDAllocator) def __init__(self, connmanager): self.connmanager = connmanager def set_min_oid(self, cursor, oid): """Ensure the next OID is at least the given OID.""" n = (oid + 15) // 16 stmt = "SELECT zoid_seq.nextval FROM DUAL" cursor.execute(stmt) next_n = cursor.fetchone()[0] if next_n < n: # Oracle provides no way modify the sequence value # except through alter sequence or drop/create sequence, # but either statement kills the current transaction. # Therefore, open a temporary connection to make the # alteration. conn2, cursor2 = self.connmanager.open() try: # Change the sequence by altering the increment. # (this is safer than dropping and re-creating the sequence) diff = n - next_n cursor2.execute("ALTER SEQUENCE zoid_seq INCREMENT BY %d" % diff) cursor2.execute("SELECT zoid_seq.nextval FROM DUAL") cursor2.execute("ALTER SEQUENCE zoid_seq INCREMENT BY 1") conn2.commit() finally: self.connmanager.close(conn2, cursor2) @metricmethod def new_oids(self, cursor): """Return a sequence of new, unused OIDs.""" stmt = "SELECT zoid_seq.nextval FROM DUAL" cursor.execute(stmt) n = cursor.fetchone()[0] return range(n * 16 - 15, n * 16 + 1)
class PostgreSQLLocker(Locker): implements(ILocker) def __init__(self, options, lock_exceptions, version_detector): super(PostgreSQLLocker, self).__init__(options=options, lock_exceptions=lock_exceptions) self.version_detector = version_detector @metricmethod def hold_commit_lock(self, cursor, ensure_current=False, nowait=False): try: if ensure_current: # Hold commit_lock to prevent concurrent commits # (for as short a time as possible). # Lock transaction and current_object in share mode to ensure # conflict detection has the most current data. if self.keep_history: stmt = """ LOCK TABLE commit_lock IN EXCLUSIVE MODE%s; LOCK TABLE transaction IN SHARE MODE; LOCK TABLE current_object IN SHARE MODE """ % (nowait and ' NOWAIT' or '', ) else: stmt = """ LOCK TABLE commit_lock IN EXCLUSIVE MODE%s; LOCK TABLE object_state IN SHARE MODE """ % (nowait and ' NOWAIT' or '', ) cursor.execute(stmt) else: cursor.execute("LOCK TABLE commit_lock IN EXCLUSIVE MODE%s" % (nowait and ' NOWAIT' or '', )) except self.lock_exceptions: if nowait: return False raise StorageError('Acquiring a commit lock failed') return True def release_commit_lock(self, cursor): # no action needed pass def _pg_has_advisory_locks(self, cursor): """Return true if this version of PostgreSQL supports advisory locks""" return self.version_detector.get_version(cursor) >= (8, 2) def create_pack_lock(self, cursor): if not self._pg_has_advisory_locks(cursor): cursor.execute("CREATE TABLE pack_lock ()") def hold_pack_lock(self, cursor): """Try to acquire the pack lock. Raise an exception if packing or undo is already in progress. """ if self._pg_has_advisory_locks(cursor): cursor.execute("SELECT pg_try_advisory_lock(1)") locked = cursor.fetchone()[0] if not locked: raise StorageError('A pack or undo operation is in progress') else: # b/w compat try: cursor.execute("LOCK pack_lock IN EXCLUSIVE MODE NOWAIT") except self.lock_exceptions: # psycopg2.DatabaseError: raise StorageError('A pack or undo operation is in progress') def release_pack_lock(self, cursor): """Release the pack lock.""" if self._pg_has_advisory_locks(cursor): cursor.execute("SELECT pg_advisory_unlock(1)")
class OracleLocker(Locker): implements(ILocker) def __init__(self, options, lock_exceptions, inputsize_NUMBER): super(OracleLocker, self).__init__(options=options, lock_exceptions=lock_exceptions) self.inputsize_NUMBER = inputsize_NUMBER @metricmethod def hold_commit_lock(self, cursor, ensure_current=False, nowait=False): # Hold commit_lock to prevent concurrent commits # (for as short a time as possible). timeout = not nowait and self.commit_lock_timeout or 0 status = cursor.callfunc( "DBMS_LOCK.REQUEST", self.inputsize_NUMBER, ( self.commit_lock_id, 6, # exclusive (X_MODE) timeout, True, )) if status != 0: if nowait and status == 1: return False # Lock failed due to a timeout if status >= 1 and status <= 5: msg = ('', 'timeout', 'deadlock', 'parameter error', 'lock already owned', 'illegal handle')[int(status)] else: msg = str(status) raise StorageError("Unable to acquire commit lock (%s)" % msg) # Alternative: #cursor.execute("LOCK TABLE commit_lock IN EXCLUSIVE MODE") if ensure_current: if self.keep_history: # Lock transaction and current_object in share mode to ensure # conflict detection has the most current data. cursor.execute("LOCK TABLE transaction IN SHARE MODE") cursor.execute("LOCK TABLE current_object IN SHARE MODE") else: cursor.execute("LOCK TABLE object_state IN SHARE MODE") return True def release_commit_lock(self, cursor): # no action needed pass def hold_pack_lock(self, cursor): """Try to acquire the pack lock. Raise an exception if packing or undo is already in progress. """ stmt = """ LOCK TABLE pack_lock IN EXCLUSIVE MODE NOWAIT """ try: cursor.execute(stmt) except self.lock_exceptions: # cx_Oracle.DatabaseError: raise StorageError('A pack or undo operation is in progress') def release_pack_lock(self, cursor): """Release the pack lock.""" # No action needed pass
class ScriptRunner(object): implements(IScriptRunner) # script_vars contains replacements for parts of scripts. # These are correct for PostgreSQL and MySQL but not for Oracle. script_vars = { 'TRUE': 'TRUE', 'FALSE': 'FALSE', 'TRUNCATE': 'TRUNCATE', 'oid': '%(oid)s', 'tid': '%(tid)s', 'pack_tid': '%(pack_tid)s', 'undo_tid': '%(undo_tid)s', 'self_tid': '%(self_tid)s', 'min_tid': '%(min_tid)s', 'max_tid': '%(max_tid)s', } def run_script_stmt(self, cursor, generic_stmt, generic_params=()): """Execute a statement from a script with the given parameters. params should be either an empty tuple (no parameters) or a map. The input statement is generic and needs to be transformed into a database-specific statement. """ stmt = generic_stmt % self.script_vars try: cursor.execute(stmt, generic_params) except: log.warning("script statement failed: %r; parameters: %r", stmt, generic_params) raise def run_script(self, cursor, script, params=()): """Execute a series of statements in the database. params should be either an empty tuple (no parameters) or a map. The statements are transformed by run_script_stmt before execution. """ lines = [] for line in script.split('\n'): line = line.strip() if not line or line.startswith('--'): continue if line.endswith(';'): line = line[:-1] lines.append(line) stmt = '\n'.join(lines) self.run_script_stmt(cursor, stmt, params) lines = [] else: lines.append(line) if lines: stmt = '\n'.join(lines) self.run_script_stmt(cursor, stmt, params) def run_many(self, cursor, stmt, items): """Execute a statement repeatedly. Items should be a list of tuples. stmt should use '%s' parameter format. """ cursor.executemany(stmt, items)
class OracleSchemaInstaller(AbstractSchemaInstaller): implements(ISchemaInstaller) database_type = 'oracle' def get_database_name(self, cursor): cursor.execute("SELECT ora_database_name FROM DUAL") for (name,) in cursor: return name def prepare(self): """Create the database schema if it does not already exist.""" def callback(conn, cursor): tables = self.list_tables(cursor) if not 'object_state' in tables: self.create(cursor) else: self.check_compatibility(cursor, tables) self.update_schema(cursor, tables) packages = self.list_packages(cursor) package_name = 'relstorage_op' if packages.get(package_name) != oracle_package_version: self.install_package(cursor) packages = self.list_packages(cursor) if packages.get(package_name) != oracle_package_version: raise AssertionError( "Could not get version information after " "installing the %s package." % package_name) self.connmanager.open_and_call(callback) def install_package(self, cursor): """Install the package containing stored procedures""" if self.keep_history: code = oracle_history_preserving_package else: code = oracle_history_free_package for stmt in code.split('\n/\n'): if stmt.strip(): cursor.execute(stmt) def list_tables(self, cursor): cursor.execute("SELECT table_name FROM user_tables") return [name.lower() for (name,) in cursor.fetchall()] def list_sequences(self, cursor): cursor.execute("SELECT sequence_name FROM user_sequences") return [name.lower() for (name,) in cursor.fetchall()] def list_packages(self, cursor): """List installed stored procedure packages. Returns {package name: version}. version may be None. """ stmt = """ SELECT object_name FROM user_objects WHERE object_type = 'PACKAGE' """ cursor.execute(stmt) names = [name for (name,) in cursor.fetchall()] res = {} for name in names: version = None stmt = """ SELECT TEXT FROM USER_SOURCE WHERE TYPE='PACKAGE BODY' AND NAME=:1 """ cursor.execute(stmt, (name,)) for (text,) in cursor: match = re.search(r'Version:\s*([0-9a-zA-Z.]+)', text) if match is not None: version = match.group(1) break res[name.lower()] = version return res
class HistoryFreePackUndo(PackUndo): implements(IPackUndo) keep_history = False _script_choose_pack_transaction = """ SELECT tid FROM object_state WHERE tid > 0 AND tid <= %(tid)s ORDER BY tid DESC LIMIT 1 """ _script_create_temp_pack_visit = """ CREATE TEMPORARY TABLE temp_pack_visit ( zoid BIGINT NOT NULL, keep_tid BIGINT NOT NULL ); CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid); CREATE INDEX temp_pack_keep_tid ON temp_pack_visit (keep_tid) """ def verify_undoable(self, cursor, undo_tid): """Raise UndoError if it is not safe to undo the specified txn.""" raise UndoError("Undo is not supported by this storage") def undo(self, cursor, undo_tid, self_tid): """Undo a transaction. Parameters: "undo_tid", the integer tid of the transaction to undo, and "self_tid", the integer tid of the current transaction. Returns the list of OIDs undone. """ raise UndoError("Undo is not supported by this storage") def on_filling_object_refs(self): """Test injection point""" def fill_object_refs(self, conn, cursor, get_references): """Update the object_refs table by analyzing new object states. Note that ZODB connections can change the object states while this method is running, possibly obscuring object references, so this method runs repeatedly until it detects no changes between two passes. """ holding_commit = False attempt = 0 while True: attempt += 1 if attempt >= 3 and not holding_commit: # Starting with the third attempt, hold the commit lock # to prevent changes. holding_commit = True self.locker.hold_commit_lock(cursor) stmt = """ SELECT object_state.zoid FROM object_state LEFT JOIN object_refs_added ON (object_state.zoid = object_refs_added.zoid) WHERE object_refs_added.tid IS NULL OR object_refs_added.tid != object_state.tid ORDER BY object_state.zoid """ self.runner.run_script_stmt(cursor, stmt) oids = [oid for (oid, ) in fetchmany(cursor)] log_at = time.time() + 60 if oids: if attempt == 1: self.on_filling_object_refs() oid_count = len(oids) oids_done = 0 log.info("analyzing references from %d object(s)", oid_count) while oids: batch = oids[:100] oids = oids[100:] self._add_refs_for_oids(cursor, batch, get_references) oids_done += len(batch) now = time.time() if now >= log_at: # Save the work done so far. conn.commit() log_at = now + 60 log.info("objects analyzed: %d/%d", oids_done, oid_count) conn.commit() log.info("objects analyzed: %d/%d", oids_done, oid_count) else: # No changes since last pass. break def _add_refs_for_oids(self, cursor, oids, get_references): """Fill object_refs with the states for some objects. Returns the number of references added. """ oid_list = ','.join(str(oid) for oid in oids) use_base64 = (self.database_type == 'postgresql') if use_base64: stmt = """ SELECT zoid, tid, encode(state, 'base64') FROM object_state WHERE zoid IN (%s) """ % oid_list else: stmt = """ SELECT zoid, tid, state FROM object_state WHERE zoid IN (%s) """ % oid_list self.runner.run_script_stmt(cursor, stmt) add_objects = [] add_refs = [] for from_oid, tid, state in fetchmany(cursor): if hasattr(state, 'read'): # Oracle state = state.read() add_objects.append((from_oid, tid)) if state: state = decode_bytes_param(state, use_base64) try: to_oids = get_references(state) except: log.error( "pre_pack: can't unpickle " "object %d in transaction %d; state length = %d" % (from_oid, tid, len(state))) raise for to_oid in to_oids: add_refs.append((from_oid, tid, to_oid)) if not add_objects: return 0 stmt = "DELETE FROM object_refs_added WHERE zoid IN (%s)" % oid_list self.runner.run_script_stmt(cursor, stmt) stmt = "DELETE FROM object_ref WHERE zoid IN (%s)" % oid_list self.runner.run_script_stmt(cursor, stmt) stmt = """ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s) """ self.runner.run_many(cursor, stmt, add_refs) stmt = """ INSERT INTO object_refs_added (zoid, tid) VALUES (%s, %s) """ self.runner.run_many(cursor, stmt, add_objects) return len(add_refs) @metricmethod def pre_pack(self, pack_tid, get_references): """Decide what the garbage collector should delete. Objects created or modified after pack_tid will not be garbage collected. get_references is a function that accepts a pickled state and returns a set of OIDs that state refers to. The self.options.pack_gc flag indicates whether to run garbage collection. If pack_gc is false, this method does nothing. """ if not self.options.pack_gc: log.warning("pre_pack: garbage collection is disabled on a " "history-free storage, so doing nothing") return conn, cursor = self.connmanager.open_for_pre_pack() try: try: self._pre_pack_main(conn, cursor, pack_tid, get_references) except: log.exception("pre_pack: failed") conn.rollback() raise else: conn.commit() log.info("pre_pack: finished successfully") finally: self.connmanager.close(conn, cursor) def _pre_pack_main(self, conn, cursor, pack_tid, get_references): """Determine what to garbage collect. """ stmt = self._script_create_temp_pack_visit if stmt: self.runner.run_script(cursor, stmt) self.fill_object_refs(conn, cursor, get_references) log.info("pre_pack: filling the pack_object table") # Fill the pack_object table with all known OIDs. stmt = """ %(TRUNCATE)s pack_object; INSERT INTO pack_object (zoid, keep, keep_tid) SELECT zoid, %(FALSE)s, tid FROM object_state; -- Keep the root object. UPDATE pack_object SET keep = %(TRUE)s WHERE zoid = 0; -- Keep objects that have been revised since pack_tid. UPDATE pack_object SET keep = %(TRUE)s WHERE keep_tid > %(pack_tid)s; """ self.runner.run_script(cursor, stmt, {'pack_tid': pack_tid}) # Traverse the graph, setting the 'keep' flags in pack_object self._traverse_graph(cursor) def _find_pack_tid(self): """If pack was not completed, find our pack tid again""" # pack (below) ignores it's pack_tid argument, so we can safely # return None here return None @metricmethod def pack(self, pack_tid, sleep=None, packed_func=None): """Run garbage collection. Requires the information provided by pre_pack. """ # Read committed mode is sufficient. conn, cursor = self.connmanager.open() try: try: stmt = """ SELECT zoid, keep_tid FROM pack_object WHERE keep = %(FALSE)s """ self.runner.run_script_stmt(cursor, stmt) to_remove = list(fetchmany(cursor)) total = len(to_remove) log.info("pack: will remove %d object(s)", total) # Hold the commit lock while packing to prevent deadlocks. # Pack in small batches of transactions only after we are able # to obtain a commit lock in order to minimize the # interruption of concurrent write operations. start = time.time() packed_list = [] # We'll report on progress in at most .1% step increments lastreport, reportstep = 0, max(total / 1000, 1) self._pause_pack_until_lock(cursor, sleep) while to_remove: items = to_remove[:100] del to_remove[:100] stmt = """ DELETE FROM object_state WHERE zoid = %s AND tid = %s """ self.runner.run_many(cursor, stmt, items) packed_list.extend(items) if time.time() >= start + self.options.pack_batch_timeout: conn.commit() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) del packed_list[:] counter = total - len(to_remove) if counter >= lastreport + reportstep: log.info("pack: removed %d (%.1f%%) state(s)", counter, counter / float(total) * 100) lastreport = counter / reportstep * reportstep self.locker.release_commit_lock(cursor) self._pause_pack_until_lock(cursor, sleep) start = time.time() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) packed_list = None self._pack_cleanup(conn, cursor) except: log.exception("pack: failed") conn.rollback() raise else: log.info("pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor) def _pack_cleanup(self, conn, cursor): # commit the work done so far conn.commit() self.locker.release_commit_lock(cursor) log.info("pack: cleaning up") # This section does not need to hold the commit lock, as it only # touches pack-specific tables. We already hold a pack lock for that. stmt = """ DELETE FROM object_refs_added WHERE zoid IN ( SELECT zoid FROM pack_object WHERE keep = %(FALSE)s ); DELETE FROM object_ref WHERE zoid IN ( SELECT zoid FROM pack_object WHERE keep = %(FALSE)s ); %(TRUNCATE)s pack_object """ self.runner.run_script(cursor, stmt)
class OracleTransactionControl(TransactionControl): implements(ITransactionControl) def __init__(self, keep_history, Binary, twophase): self.keep_history = keep_history self.Binary = Binary self.twophase = twophase def commit_phase1(self, conn, cursor, tid): """Begin a commit. Returns the transaction name. The transaction name must not be None. This method should guarantee that commit_phase2() will succeed, meaning that if commit_phase2() would raise any error, the error should be raised in commit_phase1() instead. """ if self.twophase: conn.prepare() return '-' def get_tid(self, cursor): """Returns the most recent tid. """ if self.keep_history: stmt = """ SELECT MAX(tid) FROM transaction """ cursor.execute(stmt) rows = list(cursor) else: stmt = """ SELECT MAX(tid) FROM object_state """ cursor.execute(stmt) rows = list(cursor) if not rows: # nothing has been stored yet return 0 assert len(rows) == 1 tid = rows[0][0] if tid is None: tid = 0 return tid def add_transaction(self, cursor, tid, username, description, extension, packed=False): """Add a transaction.""" if self.keep_history: stmt = """ INSERT INTO transaction (tid, packed, username, description, extension) VALUES (:1, :2, :3, :4, :5) """ max_desc_len = 2000 if len(description) > max_desc_len: log.warning('Trimming description of transaction %s ' 'to %d characters', tid, max_desc_len) description = description[:max_desc_len] cursor.execute(stmt, ( tid, packed and 'Y' or 'N', self.Binary(username), self.Binary(description), self.Binary(extension)))
class PostgreSQLSchemaInstaller(AbstractSchemaInstaller): implements(ISchemaInstaller) database_type = 'postgresql' def __init__(self, connmanager, runner, locker, keep_history): super(PostgreSQLSchemaInstaller, self).__init__( connmanager, runner, keep_history) self.locker = locker def get_database_name(self, cursor): cursor.execute("SELECT current_database()") for (name,) in cursor: return name def prepare(self): """Create the database schema if it does not already exist.""" def callback(conn, cursor): tables = self.list_tables(cursor) if not 'object_state' in tables: self.create(cursor) else: self.check_compatibility(cursor, tables) self.update_schema(cursor, tables) if not self.all_procedures_installed(cursor): self.install_procedures(cursor) if not self.all_procedures_installed(cursor): raise AssertionError( "Could not get version information after " "installing the stored procedures.") triggers = self.list_triggers(cursor) if not 'blob_chunk_delete' in triggers: self.install_triggers(cursor) self.connmanager.open_and_call(callback) def create(self, cursor): """Create the database tables.""" super(PostgreSQLSchemaInstaller, self).create(cursor) # Create the pack_lock table only on PostgreSQL 8.1 (not 8.2+) self.locker.create_pack_lock(cursor) def list_tables(self, cursor): cursor.execute("SELECT tablename FROM pg_tables") return [name for (name,) in cursor.fetchall()] def list_sequences(self, cursor): cursor.execute("SELECT relname FROM pg_class WHERE relkind = 'S'") return [name for (name,) in cursor.fetchall()] def list_languages(self, cursor): cursor.execute("SELECT lanname FROM pg_catalog.pg_language") return [name for (name,) in cursor.fetchall()] def install_languages(self, cursor): if 'plpgsql' not in self.list_languages(cursor): cursor.execute("CREATE LANGUAGE plpgsql") def list_procedures(self, cursor): """Returns {procedure name: version}. version may be None.""" stmt = """ SELECT proname, prosrc FROM pg_catalog.pg_namespace n JOIN pg_catalog.pg_proc p ON pronamespace = n.oid JOIN pg_catalog.pg_type t ON prorettype = t.oid WHERE nspname = 'public' """ cursor.execute(stmt) res = {} for (name, text) in cursor.fetchall(): version = None match = re.search(r'Version:\s*([0-9a-zA-Z.]+)', text) if match is not None: version = match.group(1) res[name.lower()] = version return res def all_procedures_installed(self, cursor): """Check whether all required stored procedures are installed. Returns True only if all required procedures are installed and up to date. """ expect = [ 'blob_chunk_delete_trigger', 'temp_blob_chunk_delete_trigger', ] current_procs = self.list_procedures(cursor) for proc in expect: if current_procs.get(proc) != postgresql_proc_version: return False return True def install_procedures(self, cursor): """Install the stored procedures""" self.install_languages(cursor) cursor.execute(postgresql_procedures) def list_triggers(self, cursor): cursor.execute("SELECT tgname FROM pg_trigger") return [name for (name,) in cursor] def install_triggers(self, cursor): stmt = """ CREATE TRIGGER blob_chunk_delete BEFORE DELETE ON blob_chunk FOR EACH ROW EXECUTE PROCEDURE blob_chunk_delete_trigger() """ cursor.execute(stmt) def drop_all(self): def callback(conn, cursor): if 'blob_chunk' in self.list_tables(cursor): # Trigger deletion of blob OIDs. cursor.execute("DELETE FROM blob_chunk") self.connmanager.open_and_call(callback) super(PostgreSQLSchemaInstaller, self).drop_all()
class ReplicaSelector(object): implements(IReplicaSelector) def __init__(self, fn, replica_timeout): self.replica_conf = fn self.replica_timeout = replica_timeout self._read_config() self._select(0) self._iterating = False self._skip_index = None def _read_config(self): self._config_modified = os.path.getmtime(self.replica_conf) self._config_checked = time.time() f = open(self.replica_conf, 'r') try: lines = f.readlines() finally: f.close() replicas = [] for line in lines: line = line.strip() if not line or line.startswith('#'): continue replicas.append(line) if not replicas: raise IndexError("No replicas specified in %s" % self.replica_conf) self._replicas = replicas def _is_config_modified(self): now = time.time() if now < self._config_checked + 1: # don't check the last mod time more often than once per second return False self._config_checked = now t = os.path.getmtime(self.replica_conf) return t != self._config_modified def _select(self, index): self._current_replica = self._replicas[index] self._current_index = index if index > 0 and self.replica_timeout: self._expiration = time.time() + self.replica_timeout else: self._expiration = None def current(self): """Get the current replica.""" self._iterating = False if self._is_config_modified(): self._read_config() self._select(0) elif self._expiration is not None and time.time() >= self._expiration: self._select(0) return self._current_replica @metricmethod def next(self): """Return the next replica to try. Return None if there are no more replicas defined. """ if self._is_config_modified(): # Start over even if iteration was already in progress. self._read_config() self._select(0) self._skip_index = None self._iterating = True elif not self._iterating: # Start iterating. self._skip_index = self._current_index i = 0 if i == self._skip_index: i = 1 if i >= len(self._replicas): # There are no more replicas to try. self._select(0) return None self._select(i) self._iterating = True else: # Continue iterating. i = self._current_index + 1 if i == self._skip_index: i += 1 if i >= len(self._replicas): # There are no more replicas to try. self._select(0) return None self._select(i) return self._current_replica
class HistoryPreservingPackUndo(PackUndo): implements(IPackUndo) keep_history = True _script_choose_pack_transaction = """ SELECT tid FROM transaction WHERE tid > 0 AND tid <= %(tid)s AND packed = FALSE ORDER BY tid DESC LIMIT 1 """ _script_create_temp_pack_visit = """ CREATE TEMPORARY TABLE temp_pack_visit ( zoid BIGINT NOT NULL, keep_tid BIGINT NOT NULL ); CREATE UNIQUE INDEX temp_pack_visit_zoid ON temp_pack_visit (zoid); CREATE INDEX temp_pack_keep_tid ON temp_pack_visit (keep_tid) """ _script_create_temp_undo = """ CREATE TEMPORARY TABLE temp_undo ( zoid BIGINT NOT NULL, prev_tid BIGINT NOT NULL ); CREATE UNIQUE INDEX temp_undo_zoid ON temp_undo (zoid) """ _script_reset_temp_undo = "DROP TABLE temp_undo" _script_find_pack_tid = """ SELECT keep_tid FROM pack_object ORDER BY keep_tid DESC LIMIT 1 """ _script_transaction_has_data = """ SELECT tid FROM object_state WHERE tid = %(tid)s LIMIT 1 """ _script_pack_current_object = """ DELETE FROM current_object WHERE tid = %(tid)s AND zoid in ( SELECT pack_state.zoid FROM pack_state WHERE pack_state.tid = %(tid)s ) """ _script_pack_object_state = """ DELETE FROM object_state WHERE tid = %(tid)s AND zoid in ( SELECT pack_state.zoid FROM pack_state WHERE pack_state.tid = %(tid)s ) """ _script_pack_object_ref = """ DELETE FROM object_refs_added WHERE tid IN ( SELECT tid FROM transaction WHERE empty = %(TRUE)s ); DELETE FROM object_ref WHERE tid IN ( SELECT tid FROM transaction WHERE empty = %(TRUE)s ) """ # See http://www.postgres.cz/index.php/PostgreSQL_SQL_Tricks#Fast_first_n_rows_removing # for = any(array(...)) rationale. _script_delete_empty_transactions_batch = """ DELETE FROM transaction WHERE tid = any(array( SELECT tid FROM transaction WHERE packed = %(TRUE)s AND empty = %(TRUE)s LIMIT 1000 )) """ @metricmethod def verify_undoable(self, cursor, undo_tid): """Raise UndoError if it is not safe to undo the specified txn.""" stmt = """ SELECT 1 FROM transaction WHERE tid = %(undo_tid)s AND packed = %(FALSE)s """ self.runner.run_script_stmt(cursor, stmt, {'undo_tid': undo_tid}) if not cursor.fetchall(): raise UndoError("Transaction not found or packed") # Rule: we can undo an object if the object's state in the # transaction to undo matches the object's current state. # If any object in the transaction does not fit that rule, # refuse to undo. # (Note that this prevents conflict-resolving undo as described # by ZODB.tests.ConflictResolution.ConflictResolvingTransUndoStorage. # Do people need that? If so, we can probably support it, but it # will require additional code.) stmt = """ SELECT prev_os.zoid, current_object.tid FROM object_state prev_os JOIN object_state cur_os ON (prev_os.zoid = cur_os.zoid) JOIN current_object ON (cur_os.zoid = current_object.zoid AND cur_os.tid = current_object.tid) WHERE prev_os.tid = %(undo_tid)s AND cur_os.md5 != prev_os.md5 """ self.runner.run_script_stmt(cursor, stmt, {'undo_tid': undo_tid}) if cursor.fetchmany(): raise UndoError("Some data were modified by a later transaction") # Rule: don't allow the creation of the root object to # be undone. It's hard to get it back. stmt = """ SELECT 1 FROM object_state WHERE tid = %(undo_tid)s AND zoid = 0 AND prev_tid = 0 """ self.runner.run_script_stmt(cursor, stmt, {'undo_tid': undo_tid}) if cursor.fetchall(): raise UndoError("Can't undo the creation of the root object") @metricmethod def undo(self, cursor, undo_tid, self_tid): """Undo a transaction. Parameters: "undo_tid", the integer tid of the transaction to undo, and "self_tid", the integer tid of the current transaction. Returns the states copied forward by the undo operation as a list of (oid, old_tid). """ stmt = self._script_create_temp_undo if stmt: self.runner.run_script(cursor, stmt) stmt = """ DELETE FROM temp_undo; -- Put into temp_undo the list of objects to be undone and -- the tid of the transaction that has the undone state. INSERT INTO temp_undo (zoid, prev_tid) SELECT zoid, prev_tid FROM object_state WHERE tid = %(undo_tid)s; -- Override previous undo operations within this transaction -- by resetting the current_object pointer and deleting -- copied states from object_state. UPDATE current_object SET tid = ( SELECT prev_tid FROM object_state WHERE zoid = current_object.zoid AND tid = %(self_tid)s ) WHERE zoid IN (SELECT zoid FROM temp_undo) AND tid = %(self_tid)s; DELETE FROM object_state WHERE zoid IN (SELECT zoid FROM temp_undo) AND tid = %(self_tid)s; -- Copy old states forward. INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT temp_undo.zoid, %(self_tid)s, current_object.tid, md5, COALESCE(state_size, 0), state FROM temp_undo JOIN current_object ON (temp_undo.zoid = current_object.zoid) LEFT JOIN object_state ON (object_state.zoid = temp_undo.zoid AND object_state.tid = temp_undo.prev_tid); -- Copy old blob chunks forward. INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT temp_undo.zoid, %(self_tid)s, chunk_num, chunk FROM temp_undo JOIN blob_chunk ON (blob_chunk.zoid = temp_undo.zoid AND blob_chunk.tid = temp_undo.prev_tid); -- List the copied states. SELECT zoid, prev_tid FROM temp_undo """ self.runner.run_script(cursor, stmt, { 'undo_tid': undo_tid, 'self_tid': self_tid }) res = list(cursor) stmt = self._script_reset_temp_undo if stmt: self.runner.run_script(cursor, stmt) return res def on_filling_object_refs(self): """Test injection point""" def fill_object_refs(self, conn, cursor, get_references): """Update the object_refs table by analyzing new transactions.""" stmt = """ SELECT transaction.tid FROM transaction LEFT JOIN object_refs_added ON (transaction.tid = object_refs_added.tid) WHERE object_refs_added.tid IS NULL ORDER BY transaction.tid """ self.runner.run_script_stmt(cursor, stmt) tids = [tid for (tid, ) in cursor] log_at = time.time() + 60 if tids: self.on_filling_object_refs() tid_count = len(tids) txns_done = 0 log.info( "analyzing references from objects in %d new " "transaction(s)", tid_count) for tid in tids: self._add_refs_for_tid(cursor, tid, get_references) txns_done += 1 now = time.time() if now >= log_at: # save the work done so far conn.commit() log_at = now + 60 log.info("transactions analyzed: %d/%d", txns_done, tid_count) conn.commit() log.info("transactions analyzed: %d/%d", txns_done, tid_count) def _add_refs_for_tid(self, cursor, tid, get_references): """Fill object_refs with all states for a transaction. Returns the number of references added. """ log.debug("pre_pack: transaction %d: computing references ", tid) from_count = 0 use_base64 = (self.database_type == 'postgresql') if use_base64: stmt = """ SELECT zoid, encode(state, 'base64') FROM object_state WHERE tid = %(tid)s """ else: stmt = """ SELECT zoid, state FROM object_state WHERE tid = %(tid)s """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) add_rows = [] # [(from_oid, tid, to_oid)] for from_oid, state in fetchmany(cursor): if hasattr(state, 'read'): # Oracle state = state.read() if state: state = decode_bytes_param(state, use_base64) from_count += 1 try: to_oids = get_references(state) except: log.error( "pre_pack: can't unpickle " "object %d in transaction %d; state length = %d" % (from_oid, tid, len(state))) raise for to_oid in to_oids: add_rows.append((from_oid, tid, to_oid)) # A previous pre-pack may have been interrupted. Delete rows # from the interrupted attempt. stmt = "DELETE FROM object_ref WHERE tid = %(tid)s" self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) # Add the new references. stmt = """ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s) """ self.runner.run_many(cursor, stmt, add_rows) # The references have been computed for this transaction. stmt = """ INSERT INTO object_refs_added (tid) VALUES (%(tid)s) """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) to_count = len(add_rows) log.debug( "pre_pack: transaction %d: has %d reference(s) " "from %d object(s)", tid, to_count, from_count) return to_count @metricmethod def pre_pack(self, pack_tid, get_references): """Decide what to pack. pack_tid specifies the most recent transaction to pack. get_references is a function that accepts a pickled state and returns a set of OIDs that state refers to. The self.options.pack_gc flag indicates whether to run garbage collection. If pack_gc is false, at least one revision of every object is kept, even if nothing refers to it. Packing with pack_gc disabled can be much faster. """ conn, cursor = self.connmanager.open_for_pre_pack() try: try: if self.options.pack_gc: log.info("pre_pack: start with gc enabled") self._pre_pack_with_gc(conn, cursor, pack_tid, get_references) else: log.info("pre_pack: start without gc") self._pre_pack_without_gc(conn, cursor, pack_tid) conn.commit() log.info("pre_pack: enumerating states to pack") stmt = "%(TRUNCATE)s pack_state" self.runner.run_script_stmt(cursor, stmt) to_remove = 0 if self.options.pack_gc: # Pack objects with the keep flag set to false. stmt = """ INSERT INTO pack_state (tid, zoid) SELECT tid, zoid FROM object_state JOIN pack_object USING (zoid) WHERE keep = %(FALSE)s AND tid > 0 AND tid <= %(pack_tid)s """ self.runner.run_script_stmt(cursor, stmt, {'pack_tid': pack_tid}) to_remove += cursor.rowcount # Pack object states with the keep flag set to true. stmt = """ INSERT INTO pack_state (tid, zoid) SELECT tid, zoid FROM object_state JOIN pack_object USING (zoid) WHERE keep = %(TRUE)s AND tid > 0 AND tid != keep_tid AND tid <= %(pack_tid)s """ self.runner.run_script_stmt(cursor, stmt, {'pack_tid': pack_tid}) to_remove += cursor.rowcount log.info("pre_pack: enumerating transactions to pack") stmt = "%(TRUNCATE)s pack_state_tid" self.runner.run_script_stmt(cursor, stmt) stmt = """ INSERT INTO pack_state_tid (tid) SELECT DISTINCT tid FROM pack_state """ cursor.execute(stmt) log.info("pre_pack: will remove %d object state(s)", to_remove) except: log.exception("pre_pack: failed") conn.rollback() raise else: log.info("pre_pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor) def _pre_pack_without_gc(self, conn, cursor, pack_tid): """Determine what to pack, without garbage collection. With garbage collection disabled, there is no need to follow object references. """ # Fill the pack_object table with OIDs, but configure them # all to be kept by setting keep to true. log.debug("pre_pack: populating pack_object") stmt = """ %(TRUNCATE)s pack_object; INSERT INTO pack_object (zoid, keep, keep_tid) SELECT zoid, %(TRUE)s, MAX(tid) FROM object_state WHERE tid > 0 AND tid <= %(pack_tid)s GROUP BY zoid """ self.runner.run_script(cursor, stmt, {'pack_tid': pack_tid}) def _pre_pack_with_gc(self, conn, cursor, pack_tid, get_references): """Determine what to pack, with garbage collection. """ stmt = self._script_create_temp_pack_visit if stmt: self.runner.run_script(cursor, stmt) self.fill_object_refs(conn, cursor, get_references) log.info("pre_pack: filling the pack_object table") # Fill the pack_object table with OIDs that either will be # removed (if nothing references the OID) or whose history will # be cut. stmt = """ %(TRUNCATE)s pack_object; INSERT INTO pack_object (zoid, keep, keep_tid) SELECT zoid, %(FALSE)s, MAX(tid) FROM object_state WHERE tid > 0 AND tid <= %(pack_tid)s GROUP BY zoid; -- Keep the root object. UPDATE pack_object SET keep = %(TRUE)s WHERE zoid = 0; -- Keep objects that have been revised since pack_tid. -- Use temp_pack_visit for temporary state; otherwise MySQL 5 chokes. INSERT INTO temp_pack_visit (zoid, keep_tid) SELECT zoid, 0 FROM current_object WHERE tid > %(pack_tid)s; UPDATE pack_object SET keep = %(TRUE)s WHERE zoid IN ( SELECT zoid FROM temp_pack_visit ); %(TRUNCATE)s temp_pack_visit; -- Keep objects that are still referenced by object states in -- transactions that will not be packed. -- Use temp_pack_visit for temporary state; otherwise MySQL 5 chokes. INSERT INTO temp_pack_visit (zoid, keep_tid) SELECT DISTINCT to_zoid, 0 FROM object_ref WHERE tid > %(pack_tid)s; UPDATE pack_object SET keep = %(TRUE)s WHERE zoid IN ( SELECT zoid FROM temp_pack_visit ); %(TRUNCATE)s temp_pack_visit; """ self.runner.run_script(cursor, stmt, {'pack_tid': pack_tid}) # Traverse the graph, setting the 'keep' flags in pack_object self._traverse_graph(cursor) def _find_pack_tid(self): """If pack was not completed, find our pack tid again""" conn, cursor = self.connmanager.open_for_pre_pack() try: stmt = self._script_find_pack_tid self.runner.run_script_stmt(cursor, stmt) res = [tid for (tid, ) in cursor] finally: self.connmanager.close(conn, cursor) return res and res[0] or 0 @metricmethod def pack(self, pack_tid, sleep=None, packed_func=None): """Pack. Requires the information provided by pre_pack.""" # Read committed mode is sufficient. conn, cursor = self.connmanager.open() try: try: stmt = """ SELECT transaction.tid, CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END, CASE WHEN pack_state_tid.tid IS NOT NULL THEN 1 ELSE 0 END FROM transaction LEFT JOIN pack_state_tid ON ( transaction.tid = pack_state_tid.tid) WHERE transaction.tid > 0 AND transaction.tid <= %(pack_tid)s AND (packed = %(FALSE)s OR pack_state_tid.tid IS NOT NULL) """ self.runner.run_script_stmt(cursor, stmt, {'pack_tid': pack_tid}) tid_rows = list(fetchmany(cursor)) tid_rows.sort() # oldest first total = len(tid_rows) log.info("pack: will pack %d transaction(s)", total) stmt = self._script_create_temp_pack_visit if stmt: self.runner.run_script(cursor, stmt) # Hold the commit lock while packing to prevent deadlocks. # Pack in small batches of transactions only after we are able # to obtain a commit lock in order to minimize the # interruption of concurrent write operations. start = time.time() packed_list = [] counter, lastreport, statecounter = 0, 0, 0 # We'll report on progress in at most .1% step increments reportstep = max(total / 1000, 1) self._pause_pack_until_lock(cursor, sleep) for tid, packed, has_removable in tid_rows: self._pack_transaction(cursor, pack_tid, tid, packed, has_removable, packed_list) counter += 1 if time.time() >= start + self.options.pack_batch_timeout: conn.commit() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) statecounter += len(packed_list) if counter >= lastreport + reportstep: log.info( "pack: packed %d (%.1f%%) transaction(s), " "affecting %d states", counter, counter / float(total) * 100, statecounter) lastreport = counter / reportstep * reportstep del packed_list[:] self.locker.release_commit_lock(cursor) self._pause_pack_until_lock(cursor, sleep) start = time.time() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) packed_list = None self._pack_cleanup(conn, cursor, sleep) except: log.exception("pack: failed") conn.rollback() raise else: log.info("pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor) def _pack_transaction(self, cursor, pack_tid, tid, packed, has_removable, packed_list): """Pack one transaction. Requires populated pack tables.""" log.debug("pack: transaction %d: packing", tid) removed_objects = 0 removed_states = 0 if has_removable: stmt = self._script_pack_current_object self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) removed_objects = cursor.rowcount stmt = self._script_pack_object_state self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) removed_states = cursor.rowcount # Terminate prev_tid chains stmt = """ UPDATE object_state SET prev_tid = 0 WHERE prev_tid = %(tid)s AND tid <= %(pack_tid)s """ self.runner.run_script_stmt(cursor, stmt, { 'pack_tid': pack_tid, 'tid': tid }) stmt = """ SELECT pack_state.zoid FROM pack_state WHERE pack_state.tid = %(tid)s """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) for (oid, ) in fetchmany(cursor): packed_list.append((oid, tid)) # Find out whether the transaction is empty stmt = self._script_transaction_has_data self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) empty = not list(cursor) # mark the transaction packed and possibly empty if empty: clause = 'empty = %(TRUE)s' state = 'empty' else: clause = 'empty = %(FALSE)s' state = 'not empty' stmt = "UPDATE transaction SET packed = %(TRUE)s, " + clause stmt += " WHERE tid = %(tid)s" self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) log.debug( "pack: transaction %d (%s): removed %d object(s) and %d state(s)", tid, state, removed_objects, removed_states) def _pack_cleanup(self, conn, cursor, sleep=None): """Remove unneeded table rows after packing""" # commit the work done so far conn.commit() self.locker.release_commit_lock(cursor) log.info("pack: cleaning up") # This section does not need to hold the commit lock, as it only # touches pack-specific tables. We already hold a pack lock for that. log.debug("pack: removing unused object references") stmt = self._script_pack_object_ref self.runner.run_script(cursor, stmt) # We need a commit lock when touching the transaction table though. # We'll do it in batches of 1000 rows. log.debug("pack: removing empty packed transactions") while True: self._pause_pack_until_lock(cursor, sleep) stmt = self._script_delete_empty_transactions_batch self.runner.run_script_stmt(cursor, stmt) deleted = cursor.rowcount conn.commit() self.locker.release_commit_lock(cursor) if deleted < 1000: # Last set of deletions complete break # perform cleanup that does not require the commit lock log.debug("pack: clearing temporary pack state") for _table in ('pack_object', 'pack_state', 'pack_state_tid'): stmt = '%(TRUNCATE)s ' + _table self.runner.run_script_stmt(cursor, stmt)
class PostgreSQLAdapter(object): """PostgreSQL adapter for RelStorage.""" implements(IRelStorageAdapter) def __init__(self, dsn='', options=None): # options is a relstorage.options.Options or None self._dsn = dsn if options is None: options = Options() self.options = options self.keep_history = options.keep_history self.version_detector = PostgreSQLVersionDetector() self.connmanager = Psycopg2ConnectionManager( dsn=dsn, options=options, ) self.runner = ScriptRunner() self.locker = PostgreSQLLocker( options=options, lock_exceptions=(psycopg2.DatabaseError, ), version_detector=self.version_detector, ) self.schema = PostgreSQLSchemaInstaller( connmanager=self.connmanager, runner=self.runner, locker=self.locker, keep_history=self.keep_history, ) self.mover = ObjectMover( database_type='postgresql', options=options, runner=self.runner, version_detector=self.version_detector, ) self.connmanager.set_on_store_opened(self.mover.on_store_opened) self.oidallocator = PostgreSQLOIDAllocator() self.txncontrol = PostgreSQLTransactionControl( keep_history=self.keep_history, ) self.poller = Poller( poll_query="EXECUTE get_latest_tid", keep_history=self.keep_history, runner=self.runner, revert_when_stale=options.revert_when_stale, ) if self.keep_history: self.packundo = HistoryPreservingPackUndo( database_type='postgresql', connmanager=self.connmanager, runner=self.runner, locker=self.locker, options=options, ) self.dbiter = HistoryPreservingDatabaseIterator( database_type='postgresql', runner=self.runner, ) else: self.packundo = HistoryFreePackUndo( database_type='postgresql', connmanager=self.connmanager, runner=self.runner, locker=self.locker, options=options, ) self.dbiter = HistoryFreeDatabaseIterator( database_type='postgresql', runner=self.runner, ) self.stats = PostgreSQLStats(connmanager=self.connmanager, ) def new_instance(self): return PostgreSQLAdapter(dsn=self._dsn, options=self.options) def __str__(self): parts = [self.__class__.__name__] if self.keep_history: parts.append('history preserving') else: parts.append('history free') dsnparts = self._dsn.split() s = ' '.join(p for p in dsnparts if not p.startswith('password')) parts.append('dsn=%r' % s) return ", ".join(parts)
class Poller: """Database change notification poller""" implements(IPoller) def __init__(self, poll_query, keep_history, runner, revert_when_stale): self.poll_query = poll_query self.keep_history = keep_history self.runner = runner self.revert_when_stale = revert_when_stale def poll_invalidations(self, conn, cursor, prev_polled_tid, ignore_tid): """Polls for new transactions. conn and cursor must have been created previously by open_for_load(). prev_polled_tid is the tid returned at the last poll, or None if this is the first poll. If ignore_tid is not None, changes committed in that transaction will not be included in the list of changed OIDs. Returns (changes, new_polled_tid), where changes is either a list of (oid, tid) that have changed, or None to indicate that the changes are too complex to list. new_polled_tid can be 0 if there is no data in the database. """ # find out the tid of the most recent transaction. cursor.execute(self.poll_query) rows = list(cursor) if not rows: # No data. return None, 0 new_polled_tid = rows[0][0] if not new_polled_tid: # No data. return None, 0 if prev_polled_tid is None: # This is the first time the connection has polled. return None, new_polled_tid if new_polled_tid == prev_polled_tid: # No transactions have been committed since prev_polled_tid. return (), new_polled_tid elif new_polled_tid > prev_polled_tid: # New transaction(s) have been added. if self.keep_history: # If the previously polled transaction no longer exists, # the cache is too old and needs to be cleared. # XXX Do we actually need to detect this condition? I think # if we delete this block of code, all the unreachable # objects will be garbage collected anyway. So, as a test, # there is no equivalent of this block of code for # history-free storage. If something goes wrong, then we'll # know there's some other edge condition we have to account # for. stmt = "SELECT 1 FROM transaction WHERE tid = %(tid)s" cursor.execute( intern(stmt % self.runner.script_vars), {'tid': prev_polled_tid}) rows = cursor.fetchall() if not rows: # Transaction not found; perhaps it has been packed. # The connection cache should be cleared. return None, new_polled_tid # Get the list of changed OIDs and return it. if self.keep_history: stmt = """ SELECT zoid, tid FROM current_object WHERE tid > %(tid)s """ else: stmt = """ SELECT zoid, tid FROM object_state WHERE tid > %(tid)s """ params = {'tid': prev_polled_tid} if ignore_tid is not None: stmt += " AND tid != %(self_tid)s" params['self_tid'] = ignore_tid stmt = intern(stmt % self.runner.script_vars) cursor.execute(stmt, params) changes = cursor.fetchall() return changes, new_polled_tid else: # The database connection is stale. This can happen after # reading an asynchronous slave that is not fully up to date. # (It may also suggest that transaction IDs are not being created # in order, which would be a serious bug leading to consistency # violations.) if self.revert_when_stale: # This client prefers to revert to the old state. log.warning( "Reverting to stale transaction ID %d and clearing cache. " "(prev_polled_tid=%d)", new_polled_tid, prev_polled_tid) # We have to invalidate the whole cPickleCache, otherwise # the cache would be inconsistent with the reverted state. return None, new_polled_tid else: # This client never wants to revert to stale data, so # raise ReadConflictError to trigger a retry. # We're probably just waiting for async replication # to catch up, so retrying could do the trick. raise ReadConflictError( "The database connection is stale: new_polled_tid=%d, " "prev_polled_tid=%d." % (new_polled_tid, prev_polled_tid)) def list_changes(self, cursor, after_tid, last_tid): """Return the (oid, tid) values changed in a range of transactions. The returned iterable must include the latest changes in the range after_tid < tid <= last_tid. """ if self.keep_history: stmt = """ SELECT zoid, tid FROM current_object WHERE tid > %(min_tid)s AND tid <= %(max_tid)s """ else: stmt = """ SELECT zoid, tid FROM object_state WHERE tid > %(min_tid)s AND tid <= %(max_tid)s """ params = {'min_tid': after_tid, 'max_tid': last_tid} stmt = intern(stmt % self.runner.script_vars) cursor.execute(stmt, params) return cursor.fetchall()
class ObjectMover(object): implements(IObjectMover) _method_names = ( 'load_current', 'load_revision', 'exists', 'load_before', 'get_object_tid_after', 'current_object_tids', 'on_store_opened', 'make_batcher', 'store_temp', 'restore', 'detect_conflict', 'replace_temp', 'move_from_temp', 'update_current', 'download_blob', 'upload_blob', ) def __init__(self, database_type, options, runner=None, Binary=None, inputsizes=None, version_detector=None): # The inputsizes parameter is for Oracle only. self.database_type = database_type self.keep_history = options.keep_history self.blob_chunk_size = options.blob_chunk_size self.runner = runner self.Binary = Binary self.inputsizes = inputsizes self.version_detector = version_detector for method_name in self._method_names: method = getattr(self, '%s_%s' % (database_type, method_name)) setattr(self, method_name, method) @metricmethod_sampled def postgresql_load_current(self, cursor, oid): """Returns the current pickle and integer tid for an object. oid is an integer. Returns (None, None) if object does not exist. """ if self.keep_history: stmt = """ SELECT encode(state, 'base64'), tid FROM current_object JOIN object_state USING(zoid, tid) WHERE zoid = %s """ else: stmt = """ SELECT encode(state, 'base64'), tid FROM object_state WHERE zoid = %s """ cursor.execute(stmt, (oid, )) if cursor.rowcount: assert cursor.rowcount == 1 state64, tid = cursor.fetchone() if state64 is not None: state = decodestring(state64) else: # This object's creation has been undone state = None return state, tid else: return None, None @metricmethod_sampled def mysql_load_current(self, cursor, oid): """Returns the current pickle and integer tid for an object. oid is an integer. Returns (None, None) if object does not exist. """ if self.keep_history: stmt = """ SELECT state, tid FROM current_object JOIN object_state USING(zoid, tid) WHERE zoid = %s """ else: stmt = """ SELECT state, tid FROM object_state WHERE zoid = %s """ cursor.execute(stmt, (oid, )) if cursor.rowcount: assert cursor.rowcount == 1 return cursor.fetchone() else: return None, None @metricmethod_sampled def oracle_load_current(self, cursor, oid): """Returns the current pickle and integer tid for an object. oid is an integer. Returns (None, None) if object does not exist. """ if self.keep_history: stmt = """ SELECT state, tid FROM current_object JOIN object_state USING(zoid, tid) WHERE zoid = :1 """ else: stmt = """ SELECT state, tid FROM object_state WHERE zoid = :1 """ return self.runner.run_lob_stmt(cursor, stmt, (oid, ), default=(None, None)) @metricmethod_sampled def postgresql_load_revision(self, cursor, oid, tid): """Returns the pickle for an object on a particular transaction. Returns None if no such state exists. """ stmt = """ SELECT encode(state, 'base64') FROM object_state WHERE zoid = %s AND tid = %s """ cursor.execute(stmt, (oid, tid)) if cursor.rowcount: assert cursor.rowcount == 1 (state64, ) = cursor.fetchone() if state64 is not None: return decodestring(state64) return None @metricmethod_sampled def mysql_load_revision(self, cursor, oid, tid): """Returns the pickle for an object on a particular transaction. Returns None if no such state exists. """ stmt = """ SELECT state FROM object_state WHERE zoid = %s AND tid = %s """ cursor.execute(stmt, (oid, tid)) if cursor.rowcount: assert cursor.rowcount == 1 (state, ) = cursor.fetchone() return state return None @metricmethod_sampled def oracle_load_revision(self, cursor, oid, tid): """Returns the pickle for an object on a particular transaction. Returns None if no such state exists. """ stmt = """ SELECT state FROM object_state WHERE zoid = :1 AND tid = :2 """ (state, ) = self.runner.run_lob_stmt(cursor, stmt, (oid, tid), default=(None, )) return state @metricmethod_sampled def generic_exists(self, cursor, oid): """Returns a true value if the given object exists.""" if self.keep_history: stmt = "SELECT 1 FROM current_object WHERE zoid = %s" else: stmt = "SELECT 1 FROM object_state WHERE zoid = %s" cursor.execute(stmt, (oid, )) return cursor.rowcount postgresql_exists = generic_exists mysql_exists = generic_exists @metricmethod_sampled def oracle_exists(self, cursor, oid): """Returns a true value if the given object exists.""" if self.keep_history: stmt = "SELECT 1 FROM current_object WHERE zoid = :1" else: stmt = "SELECT 1 FROM object_state WHERE zoid = :1" cursor.execute(stmt, (oid, )) for _row in cursor: return True return False @metricmethod_sampled def postgresql_load_before(self, cursor, oid, tid): """Returns the pickle and tid of an object before transaction tid. Returns (None, None) if no earlier state exists. """ stmt = """ SELECT encode(state, 'base64'), tid FROM object_state WHERE zoid = %s AND tid < %s ORDER BY tid DESC LIMIT 1 """ cursor.execute(stmt, (oid, tid)) if cursor.rowcount: assert cursor.rowcount == 1 state64, tid = cursor.fetchone() if state64 is not None: state = decodestring(state64) else: # The object's creation has been undone state = None return state, tid else: return None, None @metricmethod_sampled def mysql_load_before(self, cursor, oid, tid): """Returns the pickle and tid of an object before transaction tid. Returns (None, None) if no earlier state exists. """ stmt = """ SELECT state, tid FROM object_state WHERE zoid = %s AND tid < %s ORDER BY tid DESC LIMIT 1 """ cursor.execute(stmt, (oid, tid)) if cursor.rowcount: assert cursor.rowcount == 1 return cursor.fetchone() else: return None, None @metricmethod_sampled def oracle_load_before(self, cursor, oid, tid): """Returns the pickle and tid of an object before transaction tid. Returns (None, None) if no earlier state exists. """ stmt = """ SELECT state, tid FROM object_state WHERE zoid = :oid AND tid = ( SELECT MAX(tid) FROM object_state WHERE zoid = :oid AND tid < :tid ) """ return self.runner.run_lob_stmt(cursor, stmt, { 'oid': oid, 'tid': tid }, default=(None, None)) @metricmethod_sampled def generic_get_object_tid_after(self, cursor, oid, tid): """Returns the tid of the next change after an object revision. Returns None if no later state exists. """ stmt = """ SELECT tid FROM object_state WHERE zoid = %s AND tid > %s ORDER BY tid LIMIT 1 """ cursor.execute(stmt, (oid, tid)) if cursor.rowcount: assert cursor.rowcount == 1 return cursor.fetchone()[0] else: return None postgresql_get_object_tid_after = generic_get_object_tid_after mysql_get_object_tid_after = generic_get_object_tid_after @metricmethod_sampled def oracle_get_object_tid_after(self, cursor, oid, tid): """Returns the tid of the next change after an object revision. Returns None if no later state exists. """ stmt = """ SELECT MIN(tid) FROM object_state WHERE zoid = :1 AND tid > :2 """ cursor.execute(stmt, (oid, tid)) rows = cursor.fetchall() if rows: assert len(rows) == 1 return rows[0][0] else: return None @metricmethod_sampled def generic_current_object_tids(self, cursor, oids): """Returns the current {oid: tid} for specified object ids.""" res = {} if self.keep_history: table = 'current_object' else: table = 'object_state' oids = list(oids) while oids: oid_list = ','.join(str(oid) for oid in oids[:1000]) del oids[:1000] stmt = "SELECT zoid, tid FROM %s WHERE zoid IN (%s)" % (table, oid_list) cursor.execute(stmt) for oid, tid in fetchmany(cursor): res[oid] = tid return res postgresql_current_object_tids = generic_current_object_tids mysql_current_object_tids = generic_current_object_tids oracle_current_object_tids = generic_current_object_tids @metricmethod_sampled def postgresql_on_store_opened(self, cursor, restart=False): """Create the temporary tables for storing objects""" # note that the md5 column is not used if self.keep_history == False. stmt = """ CREATE TEMPORARY TABLE temp_store ( zoid BIGINT NOT NULL, prev_tid BIGINT NOT NULL, md5 CHAR(32), state BYTEA ) ON COMMIT DROP; CREATE UNIQUE INDEX temp_store_zoid ON temp_store (zoid); CREATE TEMPORARY TABLE temp_blob_chunk ( zoid BIGINT NOT NULL, chunk_num BIGINT NOT NULL, chunk OID ) ON COMMIT DROP; CREATE UNIQUE INDEX temp_blob_chunk_key ON temp_blob_chunk (zoid, chunk_num); -- This trigger removes blobs that get replaced before being -- moved to blob_chunk. Note that it is never called when -- the temp_blob_chunk table is being dropped or truncated. CREATE TRIGGER temp_blob_chunk_delete BEFORE DELETE ON temp_blob_chunk FOR EACH ROW EXECUTE PROCEDURE temp_blob_chunk_delete_trigger(); """ cursor.execute(stmt) @metricmethod_sampled def mysql_on_store_opened(self, cursor, restart=False): """Create the temporary table for storing objects""" if restart: stmt = "DROP TEMPORARY TABLE IF EXISTS temp_store" cursor.execute(stmt) stmt = "DROP TEMPORARY TABLE IF EXISTS temp_blob_chunk" cursor.execute(stmt) # note that the md5 column is not used if self.keep_history == False. stmt = """ CREATE TEMPORARY TABLE temp_store ( zoid BIGINT UNSIGNED NOT NULL PRIMARY KEY, prev_tid BIGINT UNSIGNED NOT NULL, md5 CHAR(32), state LONGBLOB ) ENGINE MyISAM """ cursor.execute(stmt) stmt = """ CREATE TEMPORARY TABLE temp_blob_chunk ( zoid BIGINT UNSIGNED NOT NULL, chunk_num BIGINT UNSIGNED NOT NULL, PRIMARY KEY (zoid, chunk_num), chunk LONGBLOB ) ENGINE MyISAM """ cursor.execute(stmt) # no store connection initialization needed for Oracle oracle_on_store_opened = None @metricmethod_sampled def postgresql_make_batcher(self, cursor, row_limit): return PostgreSQLRowBatcher(cursor, self.version_detector, row_limit) @metricmethod_sampled def mysql_make_batcher(self, cursor, row_limit): return MySQLRowBatcher(cursor, row_limit) @metricmethod_sampled def oracle_make_batcher(self, cursor, row_limit): return OracleRowBatcher(cursor, self.inputsizes, row_limit) @metricmethod_sampled def postgresql_store_temp(self, cursor, batcher, oid, prev_tid, data): """Store an object in the temporary table.""" if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None batcher.delete_from('temp_store', zoid=oid) batcher.insert_into( "temp_store (zoid, prev_tid, md5, state)", "%s, %s, %s, decode(%s, 'base64')", (oid, prev_tid, md5sum, encodestring(data)), rowkey=oid, size=len(data), ) @metricmethod_sampled def mysql_store_temp(self, cursor, batcher, oid, prev_tid, data): """Store an object in the temporary table.""" if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None batcher.insert_into( "temp_store (zoid, prev_tid, md5, state)", "%s, %s, %s, %s", (oid, prev_tid, md5sum, self.Binary(data)), rowkey=oid, size=len(data), command='REPLACE', ) @metricmethod_sampled def oracle_store_temp(self, cursor, batcher, oid, prev_tid, data): """Store an object in the temporary table.""" if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None size = len(data) if size <= 2000: # Send data inline for speed. Oracle docs say maximum size # of a RAW is 2000 bytes. stmt = "BEGIN relstorage_op.store_temp(:1, :2, :3, :4); END;" batcher.add_array_op( stmt, 'oid prev_tid md5sum rawdata', (oid, prev_tid, md5sum, data), rowkey=oid, size=size, ) else: # Send data as a BLOB row = { 'oid': oid, 'prev_tid': prev_tid, 'md5sum': md5sum, 'blobdata': data, } batcher.insert_into( "temp_store (zoid, prev_tid, md5, state)", ":oid, :prev_tid, :md5sum, :blobdata", row, rowkey=oid, size=size, ) @metricmethod_sampled def postgresql_restore(self, cursor, batcher, oid, tid, data): """Store an object directly, without conflict detection. Used for copying transactions into this database. """ if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None if data is not None: encoded = encodestring(data) size = len(data) else: encoded = None size = 0 if self.keep_history: batcher.delete_from("object_state", zoid=oid, tid=tid) row_schema = """ %s, %s, COALESCE((SELECT tid FROM current_object WHERE zoid = %s), 0), %s, %s, decode(%s, 'base64') """ batcher.insert_into( "object_state (zoid, tid, prev_tid, md5, state_size, state)", row_schema, (oid, tid, oid, md5sum, size, encoded), rowkey=(oid, tid), size=size, ) else: batcher.delete_from('object_state', zoid=oid) if data: batcher.insert_into( "object_state (zoid, tid, state_size, state)", "%s, %s, %s, decode(%s, 'base64')", (oid, tid, size, encoded), rowkey=oid, size=size, ) @metricmethod_sampled def mysql_restore(self, cursor, batcher, oid, tid, data): """Store an object directly, without conflict detection. Used for copying transactions into this database. """ if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None if data is not None: encoded = self.Binary(data) size = len(data) else: encoded = None size = 0 if self.keep_history: row_schema = """ %s, %s, COALESCE((SELECT tid FROM current_object WHERE zoid = %s), 0), %s, %s, %s """ batcher.insert_into( "object_state (zoid, tid, prev_tid, md5, state_size, state)", row_schema, (oid, tid, oid, md5sum, size, encoded), rowkey=(oid, tid), size=size, command='REPLACE', ) else: if data: batcher.insert_into( "object_state (zoid, tid, state_size, state)", "%s, %s, %s, %s", (oid, tid, size, encoded), rowkey=oid, size=size, command='REPLACE', ) else: batcher.delete_from('object_state', zoid=oid) @metricmethod_sampled def oracle_restore(self, cursor, batcher, oid, tid, data): """Store an object directly, without conflict detection. Used for copying transactions into this database. """ if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None if data is not None: size = len(data) else: size = 0 if size <= 2000: # Send data inline for speed. Oracle docs say maximum size # of a RAW is 2000 bytes. if self.keep_history: stmt = "BEGIN relstorage_op.restore(:1, :2, :3, :4); END;" batcher.add_array_op( stmt, 'oid tid md5sum rawdata', (oid, tid, md5sum, data), rowkey=(oid, tid), size=size, ) else: stmt = "BEGIN relstorage_op.restore(:1, :2, :3); END;" batcher.add_array_op( stmt, 'oid tid rawdata', (oid, tid, data), rowkey=(oid, tid), size=size, ) else: # Send as a BLOB if self.keep_history: row = { 'oid': oid, 'tid': tid, 'md5sum': md5sum, 'state_size': size, 'blobdata': data, } row_schema = """ :oid, :tid, COALESCE((SELECT tid FROM current_object WHERE zoid = :oid), 0), :md5sum, :state_size, :blobdata """ batcher.insert_into( "object_state (zoid, tid, prev_tid, md5, state_size, state)", row_schema, row, rowkey=(oid, tid), size=size, ) else: batcher.delete_from('object_state', zoid=oid) if data: row = { 'oid': oid, 'tid': tid, 'state_size': size, 'blobdata': data, } batcher.insert_into( "object_state (zoid, tid, state_size, state)", ":oid, :tid, :state_size, :blobdata", row, rowkey=oid, size=size, ) @metricmethod_sampled def postgresql_detect_conflict(self, cursor): """Find one conflict in the data about to be committed. If there is a conflict, returns (oid, prev_tid, attempted_prev_tid, attempted_data). If there is no conflict, returns None. """ if self.keep_history: stmt = """ SELECT temp_store.zoid, current_object.tid, temp_store.prev_tid, encode(temp_store.state, 'base64') FROM temp_store JOIN current_object ON (temp_store.zoid = current_object.zoid) WHERE temp_store.prev_tid != current_object.tid LIMIT 1 """ else: stmt = """ SELECT temp_store.zoid, object_state.tid, temp_store.prev_tid, encode(temp_store.state, 'base64') FROM temp_store JOIN object_state ON (temp_store.zoid = object_state.zoid) WHERE temp_store.prev_tid != object_state.tid LIMIT 1 """ cursor.execute(stmt) if cursor.rowcount: oid, prev_tid, attempted_prev_tid, data = cursor.fetchone() return oid, prev_tid, attempted_prev_tid, decodestring(data) return None @metricmethod_sampled def mysql_detect_conflict(self, cursor): """Find one conflict in the data about to be committed. If there is a conflict, returns (oid, prev_tid, attempted_prev_tid, attempted_data). If there is no conflict, returns None. """ # Lock in share mode to ensure the data being read is up to date. if self.keep_history: stmt = """ SELECT temp_store.zoid, current_object.tid, temp_store.prev_tid, temp_store.state FROM temp_store JOIN current_object ON (temp_store.zoid = current_object.zoid) WHERE temp_store.prev_tid != current_object.tid LIMIT 1 LOCK IN SHARE MODE """ else: stmt = """ SELECT temp_store.zoid, object_state.tid, temp_store.prev_tid, temp_store.state FROM temp_store JOIN object_state ON (temp_store.zoid = object_state.zoid) WHERE temp_store.prev_tid != object_state.tid LIMIT 1 LOCK IN SHARE MODE """ cursor.execute(stmt) if cursor.rowcount: return cursor.fetchone() return None @metricmethod_sampled def oracle_detect_conflict(self, cursor): """Find one conflict in the data about to be committed. If there is a conflict, returns (oid, prev_tid, attempted_prev_tid, attempted_data). If there is no conflict, returns None. """ if self.keep_history: stmt = """ SELECT temp_store.zoid, current_object.tid, temp_store.prev_tid, temp_store.state FROM temp_store JOIN current_object ON (temp_store.zoid = current_object.zoid) WHERE temp_store.prev_tid != current_object.tid """ else: stmt = """ SELECT temp_store.zoid, object_state.tid, temp_store.prev_tid, temp_store.state FROM temp_store JOIN object_state ON (temp_store.zoid = object_state.zoid) WHERE temp_store.prev_tid != object_state.tid """ return self.runner.run_lob_stmt(cursor, stmt) @metricmethod_sampled def postgresql_replace_temp(self, cursor, oid, prev_tid, data): """Replace an object in the temporary table. This happens after conflict resolution. """ if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None stmt = """ UPDATE temp_store SET prev_tid = %s, md5 = %s, state = decode(%s, 'base64') WHERE zoid = %s """ cursor.execute(stmt, (prev_tid, md5sum, encodestring(data), oid)) @metricmethod_sampled def mysql_replace_temp(self, cursor, oid, prev_tid, data): """Replace an object in the temporary table. This happens after conflict resolution. """ if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None stmt = """ UPDATE temp_store SET prev_tid = %s, md5 = %s, state = %s WHERE zoid = %s """ cursor.execute(stmt, (prev_tid, md5sum, self.Binary(data), oid)) @metricmethod_sampled def oracle_replace_temp(self, cursor, oid, prev_tid, data): """Replace an object in the temporary table. This happens after conflict resolution. """ if self.keep_history: md5sum = compute_md5sum(data) else: md5sum = None stmt = """ UPDATE temp_store SET prev_tid = :prev_tid, md5 = :md5sum, state = :blobdata WHERE zoid = :oid """ cursor.setinputsizes(blobdata=self.inputsizes['blobdata']) cursor.execute(stmt, oid=oid, prev_tid=prev_tid, md5sum=md5sum, blobdata=self.Binary(data)) @metricmethod_sampled def generic_move_from_temp(self, cursor, tid, txn_has_blobs): """Moved the temporarily stored objects to permanent storage. Returns the list of oids stored. """ if self.keep_history: if self.database_type == 'oracle': stmt = """ INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT zoid, :1, prev_tid, md5, COALESCE(LENGTH(state), 0), state FROM temp_store """ else: stmt = """ INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT zoid, %s, prev_tid, md5, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid, )) else: if self.database_type == 'mysql': stmt = """ REPLACE INTO object_state (zoid, tid, state_size, state) SELECT zoid, %s, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid, )) else: stmt = """ DELETE FROM object_state WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if self.database_type == 'oracle': stmt = """ INSERT INTO object_state (zoid, tid, state_size, state) SELECT zoid, :1, COALESCE(LENGTH(state), 0), state FROM temp_store """ else: stmt = """ INSERT INTO object_state (zoid, tid, state_size, state) SELECT zoid, %s, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid, )) if txn_has_blobs: stmt = """ DELETE FROM blob_chunk WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if txn_has_blobs: if self.database_type == 'oracle': stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT zoid, :1, chunk_num, chunk FROM temp_blob_chunk """ else: stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT zoid, %s, chunk_num, chunk FROM temp_blob_chunk """ cursor.execute(stmt, (tid, )) stmt = """ SELECT zoid FROM temp_store """ cursor.execute(stmt) return [oid for (oid, ) in fetchmany(cursor)] postgresql_move_from_temp = generic_move_from_temp mysql_move_from_temp = generic_move_from_temp oracle_move_from_temp = generic_move_from_temp @metricmethod_sampled def postgresql_update_current(self, cursor, tid): """Update the current object pointers. tid is the integer tid of the transaction being committed. """ if not self.keep_history: # nothing needs to be updated return cursor.execute( """ -- Insert objects created in this transaction into current_object. INSERT INTO current_object (zoid, tid) SELECT zoid, tid FROM object_state WHERE tid = %(tid)s AND prev_tid = 0; -- Change existing objects. To avoid deadlocks, -- update in OID order. UPDATE current_object SET tid = %(tid)s WHERE zoid IN ( SELECT zoid FROM object_state WHERE tid = %(tid)s AND prev_tid != 0 ORDER BY zoid ) """, {'tid': tid}) @metricmethod_sampled def mysql_update_current(self, cursor, tid): """Update the current object pointers. tid is the integer tid of the transaction being committed. """ if not self.keep_history: # nothing needs to be updated return cursor.execute( """ REPLACE INTO current_object (zoid, tid) SELECT zoid, tid FROM object_state WHERE tid = %s """, (tid, )) @metricmethod_sampled def oracle_update_current(self, cursor, tid): """Update the current object pointers. tid is the integer tid of the transaction being committed. """ if not self.keep_history: # nothing needs to be updated return # Insert objects created in this transaction into current_object. stmt = """ INSERT INTO current_object (zoid, tid) SELECT zoid, tid FROM object_state WHERE tid = :1 AND prev_tid = 0 """ cursor.execute(stmt, (tid, )) # Change existing objects. stmt = """ UPDATE current_object SET tid = :1 WHERE zoid IN ( SELECT zoid FROM object_state WHERE tid = :1 AND prev_tid != 0 ) """ cursor.execute(stmt, (tid, )) @metricmethod_sampled def postgresql_download_blob(self, cursor, oid, tid, filename): """Download a blob into a file.""" stmt = """ SELECT chunk_num, chunk FROM blob_chunk WHERE zoid = %s AND tid = %s ORDER BY chunk_num """ f = None bytecount = 0 try: cursor.execute(stmt, (oid, tid)) for chunk_num, loid in cursor.fetchall(): blob = cursor.connection.lobject(loid, 'rb') if chunk_num == 0: # Use the native psycopg2 blob export functionality blob.export(filename) blob.close() bytecount = os.path.getsize(filename) continue if f is None: f = open(filename, 'ab') # Append, chunk 0 was an export read_chunk_size = self.blob_chunk_size reader = iter(lambda: blob.read(read_chunk_size), b('')) for read_chunk in reader: f.write(read_chunk) bytecount += len(read_chunk) except: if f is not None: f.close() os.remove(filename) raise if f is not None: f.close() return bytecount @metricmethod_sampled def mysql_download_blob(self, cursor, oid, tid, filename): """Download a blob into a file.""" stmt = """ SELECT chunk FROM blob_chunk WHERE zoid = %s AND tid = %s AND chunk_num = %s """ f = None bytecount = 0 try: chunk_num = 0 while True: cursor.execute(stmt, (oid, tid, chunk_num)) rows = list(cursor) if rows: assert len(rows) == 1 chunk = rows[0][0] else: # No more chunks. Note: if there are no chunks at # all, then this method should not write a file. break if f is None: f = open(filename, 'wb') f.write(chunk) bytecount += len(chunk) chunk_num += 1 except: if f is not None: f.close() os.remove(filename) raise if f is not None: f.close() return bytecount @metricmethod_sampled def oracle_download_blob(self, cursor, oid, tid, filename): """Download a blob into a file.""" stmt = """ SELECT chunk FROM blob_chunk WHERE zoid = :1 AND tid = :2 ORDER BY chunk_num """ f = None bytecount = 0 # Current versions of cx_Oracle only support offsets up # to sys.maxint or 4GB, whichever comes first. maxsize = min(sys.maxint, 1 << 32) try: cursor.execute(stmt, (oid, tid)) while True: try: blob, = cursor.fetchone() except TypeError: # No more chunks. Note: if there are no chunks at # all, then this method should not write a file. break if f is None: f = open(filename, 'wb') # round off the chunk-size to be a multiple of the oracle # blob chunk size to maximize performance read_chunk_size = int( max( round(1.0 * self.blob_chunk_size / blob.getchunksize()), 1) * blob.getchunksize()) offset = 1 # Oracle still uses 1-based indexing. reader = iter(lambda: blob.read(offset, read_chunk_size), b('')) for read_chunk in reader: f.write(read_chunk) bytecount += len(read_chunk) offset += len(read_chunk) if offset > maxsize: # We have already read the maximum we can store # so we can assume we are done. If we do not break # off here, cx_Oracle will throw an overflow # exception anyway. break except: if f is not None: f.close() os.remove(filename) raise if f is not None: f.close() return bytecount @metricmethod_sampled def postgresql_upload_blob(self, cursor, oid, tid, filename): """Upload a blob from a file. If serial is None, upload to the temporary table. """ if tid is not None: if self.keep_history: delete_stmt = """ DELETE FROM blob_chunk WHERE zoid = %s AND tid = %s """ cursor.execute(delete_stmt, (oid, tid)) else: delete_stmt = "DELETE FROM blob_chunk WHERE zoid = %s" cursor.execute(delete_stmt, (oid, )) use_tid = True insert_stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) VALUES (%(oid)s, %(tid)s, %(chunk_num)s, %(loid)s) """ else: use_tid = False delete_stmt = "DELETE FROM temp_blob_chunk WHERE zoid = %s" cursor.execute(delete_stmt, (oid, )) insert_stmt = """ INSERT INTO temp_blob_chunk (zoid, chunk_num, chunk) VALUES (%(oid)s, %(chunk_num)s, %(loid)s) """ blob = None # PostgreSQL only supports up to 2GB of data per BLOB. maxsize = 1 << 31 filesize = os.path.getsize(filename) if filesize <= maxsize: # File is small enough to fit in one chunk, just use # psycopg2 native file copy support blob = cursor.connection.lobject(0, 'wb', 0, filename) blob.close() params = dict(oid=oid, chunk_num=0, loid=blob.oid) if use_tid: params['tid'] = tid cursor.execute(insert_stmt, params) return # We need to divide this up into multiple chunks f = open(filename, 'rb') try: chunk_num = 0 while True: blob = cursor.connection.lobject(0, 'wb') params = dict(oid=oid, chunk_num=chunk_num, loid=blob.oid) if use_tid: params['tid'] = tid cursor.execute(insert_stmt, params) write_chunk_size = self.blob_chunk_size for _i in xrange(int(maxsize / write_chunk_size)): write_chunk = f.read(write_chunk_size) if not blob.write(write_chunk): # EOF. return if not blob.closed: blob.close() chunk_num += 1 finally: f.close() if blob is not None and not blob.closed: blob.close() @metricmethod_sampled def mysql_upload_blob(self, cursor, oid, tid, filename): """Upload a blob from a file. If serial is None, upload to the temporary table. """ if tid is not None: if self.keep_history: delete_stmt = """ DELETE FROM blob_chunk WHERE zoid = %s AND tid = %s """ cursor.execute(delete_stmt, (oid, tid)) else: delete_stmt = "DELETE FROM blob_chunk WHERE zoid = %s" cursor.execute(delete_stmt, (oid, )) use_tid = True insert_stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) VALUES (%s, %s, %s, %s) """ else: use_tid = False delete_stmt = "DELETE FROM temp_blob_chunk WHERE zoid = %s" cursor.execute(delete_stmt, (oid, )) insert_stmt = """ INSERT INTO temp_blob_chunk (zoid, chunk_num, chunk) VALUES (%s, %s, %s) """ f = open(filename, 'rb') try: chunk_num = 0 while True: chunk = f.read(self.blob_chunk_size) if not chunk and chunk_num > 0: # EOF. Note that we always write at least one # chunk, even if the blob file is empty. break if use_tid: params = (oid, tid, chunk_num, chunk) else: params = (oid, chunk_num, chunk) cursor.execute(insert_stmt, params) chunk_num += 1 finally: f.close() @metricmethod_sampled def oracle_upload_blob(self, cursor, oid, tid, filename): """Upload a blob from a file. If serial is None, upload to the temporary table. """ if tid is not None: if self.keep_history: delete_stmt = """ DELETE FROM blob_chunk WHERE zoid = :1 AND tid = :2 """ cursor.execute(delete_stmt, (oid, tid)) else: delete_stmt = "DELETE FROM blob_chunk WHERE zoid = :1" cursor.execute(delete_stmt, (oid, )) use_tid = True insert_stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) VALUES (:oid, :tid, :chunk_num, empty_blob()) """ select_stmt = """ SELECT chunk FROM blob_chunk WHERE zoid=:oid AND tid=:tid AND chunk_num=:chunk_num """ else: use_tid = False delete_stmt = "DELETE FROM temp_blob_chunk WHERE zoid = :1" cursor.execute(delete_stmt, (oid, )) insert_stmt = """ INSERT INTO temp_blob_chunk (zoid, chunk_num, chunk) VALUES (:oid, :chunk_num, empty_blob()) """ select_stmt = """ SELECT chunk FROM temp_blob_chunk WHERE zoid=:oid AND chunk_num=:chunk_num """ f = open(filename, 'rb') # Current versions of cx_Oracle only support offsets up # to sys.maxint or 4GB, whichever comes first. We divide up our # upload into chunks within this limit. maxsize = min(sys.maxint, 1 << 32) try: chunk_num = 0 while True: blob = None params = dict(oid=oid, chunk_num=chunk_num) if use_tid: params['tid'] = tid cursor.execute(insert_stmt, params) cursor.execute(select_stmt, params) blob, = cursor.fetchone() blob.open() write_chunk_size = int( max( round(1.0 * self.blob_chunk_size / blob.getchunksize()), 1) * blob.getchunksize()) offset = 1 # Oracle still uses 1-based indexing. for _i in xrange(int(maxsize / write_chunk_size)): write_chunk = f.read(write_chunk_size) if not blob.write(write_chunk, offset): # EOF. return offset += len(write_chunk) if blob is not None and blob.isopen(): blob.close() chunk_num += 1 finally: f.close() if blob is not None and blob.isopen(): blob.close()
class HistoryPreservingDatabaseIterator(DatabaseIterator): implements(IDatabaseIterator) def _transaction_iterator(self, cursor): """Iterate over a list of transactions returned from the database. Each row begins with (tid, username, description, extension) and may have other columns. """ use_base64 = self.use_base64 for row in cursor: tid, username, description, ext = row[:4] if username is None: username = b('') else: username = decode_bytes_param(username, use_base64) if description is None: description = b('') else: description = decode_bytes_param(description, use_base64) if ext is None: ext = b('') else: ext = decode_bytes_param(ext, use_base64) yield (tid, username, description, ext) + tuple(row[4:]) def iter_transactions(self, cursor): """Iterate over the transaction log, newest first. Skips packed transactions. Yields (tid, username, description, extension) for each transaction. """ if self.use_base64: stmt = """ SELECT tid, encode(username, 'base64'), encode(description, 'base64'), encode(extension, 'base64') FROM transaction WHERE packed = %(FALSE)s AND tid != 0 ORDER BY tid DESC """ else: stmt = """ SELECT tid, username, description, extension FROM transaction WHERE packed = %(FALSE)s AND tid != 0 ORDER BY tid DESC """ self.runner.run_script_stmt(cursor, stmt) return self._transaction_iterator(cursor) def iter_transactions_range(self, cursor, start=None, stop=None): """Iterate over the transactions in the given range, oldest first. Includes packed transactions. Yields (tid, username, description, extension, packed) for each transaction. """ if self.use_base64: stmt = """ SELECT tid, encode(username, 'base64'), encode(description, 'base64'), encode(extension, 'base64'), CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END FROM transaction WHERE tid >= 0 """ else: stmt = """ SELECT tid, username, description, extension, CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END FROM transaction WHERE tid >= 0 """ if start is not None: stmt += " AND tid >= %(min_tid)s" if stop is not None: stmt += " AND tid <= %(max_tid)s" stmt += " ORDER BY tid" self.runner.run_script_stmt(cursor, stmt, { 'min_tid': start, 'max_tid': stop }) return self._transaction_iterator(cursor) def iter_object_history(self, cursor, oid): """Iterate over an object's history. Raises KeyError if the object does not exist. Yields (tid, username, description, extension, pickle_size) for each modification. """ stmt = """ SELECT 1 FROM current_object WHERE zoid = %(oid)s """ self.runner.run_script_stmt(cursor, stmt, {'oid': oid}) if not cursor.fetchall(): raise KeyError(oid) if self.use_base64: stmt = """ SELECT tid, encode(username, 'base64'), encode(description, 'base64'), encode(extension, 'base64'), state_size """ else: stmt = """ SELECT tid, username, description, extension, state_size """ stmt += """ FROM transaction JOIN object_state USING (tid) WHERE zoid = %(oid)s AND packed = %(FALSE)s ORDER BY tid DESC """ self.runner.run_script_stmt(cursor, stmt, {'oid': oid}) return self._transaction_iterator(cursor)
class AbstractConnectionManager(object): """Abstract base class for connection management. Responsible for opening and closing database connections. """ implements(IConnectionManager) # disconnected_exceptions contains the exception types that might be # raised when the connection to the database has been broken. disconnected_exceptions = (ReplicaClosedException, ) # close_exceptions contains the exception types to ignore # when the adapter attempts to close a database connection. close_exceptions = () # on_store_opened is either None or a callable that # will be called whenever a store cursor is opened or rolled back. on_store_opened = None def __init__(self, options): # options is a relstorage.options.Options instance if options.replica_conf: self.replica_selector = ReplicaSelector(options.replica_conf, options.replica_timeout) else: self.replica_selector = None if options.ro_replica_conf: self.ro_replica_selector = ReplicaSelector(options.ro_replica_conf, options.replica_timeout) else: self.ro_replica_selector = self.replica_selector def set_on_store_opened(self, f): """Set the on_store_opened hook""" self.on_store_opened = f def open(self): """Open a database connection and return (conn, cursor).""" raise NotImplementedError() @metricmethod def close(self, conn, cursor): """Close a connection and cursor, ignoring certain errors. """ for obj in (cursor, conn): if obj is not None: try: obj.close() except self.close_exceptions: pass def open_and_call(self, callback): """Call a function with an open connection and cursor. If the function returns, commits the transaction and returns the result returned by the function. If the function raises an exception, aborts the transaction then propagates the exception. """ conn, cursor = self.open() try: try: res = callback(conn, cursor) except: conn.rollback() raise else: conn.commit() return res finally: self.close(conn, cursor) def open_for_load(self): raise NotImplementedError() def restart_load(self, conn, cursor): """Reinitialize a connection for loading objects.""" self.check_replica(conn, cursor, replica_selector=self.ro_replica_selector) conn.rollback() def check_replica(self, conn, cursor, replica_selector=None): """Raise an exception if the connection belongs to an old replica""" if replica_selector is None: replica_selector = self.replica_selector if replica_selector is not None: current = replica_selector.current() if conn.replica != current: # Prompt the change to a new replica by raising an exception. self.close(conn, cursor) raise ReplicaClosedException("Switched replica from %s to %s" % (conn.replica, current)) def open_for_store(self): """Open and initialize a connection for storing objects. Returns (conn, cursor). """ conn, cursor = self.open() try: if self.on_store_opened is not None: self.on_store_opened(cursor, restart=False) return conn, cursor except: self.close(conn, cursor) raise def restart_store(self, conn, cursor): """Reuse a store connection.""" self.check_replica(conn, cursor) conn.rollback() if self.on_store_opened is not None: self.on_store_opened(cursor, restart=True) def open_for_pre_pack(self): """Open a connection to be used for the pre-pack phase. Returns (conn, cursor). """ return self.open()