def __init__(self, dsn=None, partition=None, read_only=False, name=None, pool_size=13, transaction_strategy='resolve_readcommitted', conn_acquire_timeout=20, db_schema='public', store_json=True, objects_table_name='objects', blobs_table_name='blobs', connection_manager=None, autovacuum=True, **options): super(PostgresqlStorage, self).__init__(read_only, transaction_strategy=transaction_strategy) self._dsn = dsn self._pool_size = pool_size self._partition_class = partition self._read_only = read_only self.__name__ = name self._conn_acquire_timeout = conn_acquire_timeout self._options = options self._store_json = store_json self._connection_options = {} self._connection_initialized_on = time.time() self._db_schema = db_schema self._objects_table_name = f'{db_schema}.{objects_table_name}' self._blobs_table_name = f'{db_schema}.{blobs_table_name}' self._sql = SQLStatements() self._connection_manager = connection_manager self._autovacuum = autovacuum
def __init__(self, manager, loop): self._manager = manager self._loop = loop self._queue = asyncio.Queue(loop=loop) self._closed = False self._active = False self._sql = SQLStatements()
def __init__(self, dsn=None, partition=None, read_only=False, name=None, pool_size=13, transaction_strategy='resolve_readcommitted', conn_acquire_timeout=20, cache_strategy='dummy', objects_table_name='objects', blobs_table_name='blobs', **options): super(PostgresqlStorage, self).__init__(read_only, transaction_strategy=transaction_strategy, cache_strategy=cache_strategy) self._dsn = dsn self._pool_size = pool_size self._partition_class = partition self._read_only = read_only self.__name__ = name self._read_conn = None self._lock = asyncio.Lock() self._conn_acquire_timeout = conn_acquire_timeout self._options = options self._connection_options = {} self._connection_initialized_on = time.time() self._objects_table_name = objects_table_name self._blobs_table_name = blobs_table_name self._sql = SQLStatements()
def __init__(self, dsn=None, partition=None, read_only=False, name=None, pool_size=13, transaction_strategy='resolve_readcommitted', conn_acquire_timeout=20, cache_strategy='dummy', db_schema='public', objects_table_name='objects', blobs_table_name='blobs', connection_manager=None, autovacuum=True, **options): super(PostgresqlStorage, self).__init__( read_only, transaction_strategy=transaction_strategy, cache_strategy=cache_strategy) self._dsn = dsn self._pool_size = pool_size self._partition_class = partition self._read_only = read_only self.__name__ = name self._conn_acquire_timeout = conn_acquire_timeout self._options = options self._connection_options = {} self._connection_initialized_on = time.time() self._db_schema = db_schema self._objects_table_name = f'{db_schema}.{objects_table_name}' self._blobs_table_name = f'{db_schema}.{blobs_table_name}' self._sql = SQLStatements() self._connection_manager = connection_manager self._autovacuum = autovacuum
class PostgresqlStorage(BaseStorage): """Storage to a relational database, based on invalidation polling""" _dsn = None _partition_class = None _large_record_size = 1 << 24 _vacuum_class = PGVacuum _objects_table_name = 'objects' _blobs_table_name = 'blobs' _object_schema = { 'zoid': f'VARCHAR({MAX_UID_LENGTH}) NOT NULL PRIMARY KEY', 'tid': 'BIGINT NOT NULL', 'state_size': 'BIGINT NOT NULL', 'part': 'BIGINT NOT NULL', 'resource': 'BOOLEAN NOT NULL', 'of': f'VARCHAR({MAX_UID_LENGTH}) REFERENCES {{objects_table_name}} ON DELETE CASCADE', 'otid': 'BIGINT', 'parent_id': f'VARCHAR({MAX_UID_LENGTH}) REFERENCES {{objects_table_name}} ON DELETE CASCADE', # noqa 'id': 'TEXT', 'type': 'TEXT NOT NULL', 'json': 'JSONB', 'state': 'BYTEA' } _blob_schema = { 'bid': f'VARCHAR({MAX_UID_LENGTH}) NOT NULL', 'zoid': f'VARCHAR({MAX_UID_LENGTH}) NOT NULL REFERENCES {{objects_table_name}} ON DELETE CASCADE', 'chunk_index': 'INT NOT NULL', 'data': 'BYTEA' } _initialize_statements = [ 'CREATE INDEX IF NOT EXISTS {object_table_name}_tid ON {objects_table_name} (tid);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_of ON {objects_table_name} (of);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_part ON {objects_table_name} (part);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_parent ON {objects_table_name} (parent_id);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_id ON {objects_table_name} (id);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_type ON {objects_table_name} (type);', 'CREATE INDEX IF NOT EXISTS {blob_table_name}_bid ON {blobs_table_name} (bid);', 'CREATE INDEX IF NOT EXISTS {blob_table_name}_zoid ON {blobs_table_name} (zoid);', 'CREATE INDEX IF NOT EXISTS {blob_table_name}_chunk ON {blobs_table_name} (chunk_index);', 'CREATE SEQUENCE IF NOT EXISTS {schema}.tid_sequence;' 'ALTER TABLE {objects_table_name} ADD CONSTRAINT {object_table_name}_parent_id_zoid_check CHECK (parent_id != zoid) NOT VALID;' # noqa ] _unique_constraint = """CREATE UNIQUE INDEX CONCURRENTLY {constraint_name}_parent_id_id_key ON {objects_table_name} (parent_id, id) WHERE parent_id != '{TRASHED_ID}'""" def __init__(self, dsn=None, partition=None, read_only=False, name=None, pool_size=13, transaction_strategy='resolve_readcommitted', conn_acquire_timeout=20, db_schema='public', store_json=True, objects_table_name='objects', blobs_table_name='blobs', connection_manager=None, autovacuum=True, **options): super(PostgresqlStorage, self).__init__(read_only, transaction_strategy=transaction_strategy) self._dsn = dsn self._pool_size = pool_size self._partition_class = partition self._read_only = read_only self.__name__ = name self._conn_acquire_timeout = conn_acquire_timeout self._options = options self._store_json = store_json self._connection_options = {} self._connection_initialized_on = time.time() self._db_schema = db_schema self._objects_table_name = f'{db_schema}.{objects_table_name}' self._blobs_table_name = f'{db_schema}.{blobs_table_name}' self._sql = SQLStatements() self._connection_manager = connection_manager self._autovacuum = autovacuum async def finalize(self): await self._connection_manager.close() @property def read_conn(self): return self._connection_manager.read_conn @property def pool(self): return self._connection_manager.pool @property def connection_manager(self): return self._connection_manager @property def lock(self): return self._connection_manager.lock @property def objects_table_name(self): return self._objects_table_name async def vacuum(self): await self.connection_manager.vacuum.run(self._objects_table_name) while self.connection_manager.vacuum.size > 0: await asyncio.sleep(0.1) async def create(self): # Check DB log.info('Creating initial database objects') statements = [] if self._db_schema and self._db_schema != 'public': statements.extend( [f'CREATE SCHEMA IF NOT EXISTS {self._db_schema}']) statements.extend([ get_table_definition(self._objects_table_name, self._object_schema), get_table_definition(self._blobs_table_name, self._blob_schema, primary_keys=('bid', 'zoid', 'chunk_index')) ]) statements.extend(self._initialize_statements) for statement in statements: otable_name = clear_table_name(self._objects_table_name) if otable_name == 'objects': otable_name = 'object' btable_name = clear_table_name(self._blobs_table_name) if btable_name == 'blobs': btable_name = 'blob' statement = statement.format( objects_table_name=self._objects_table_name, blobs_table_name=self._blobs_table_name, # singular, index names object_table_name=otable_name, blob_table_name=btable_name, schema=self._db_schema) try: await self.read_conn.execute(statement) except asyncpg.exceptions.UniqueViolationError: # this is okay on creation, means 2 getting created at same time pass await self.initialize_tid_statements() async def restart_connection(self, timeout=0.1): log.error('Connection potentially lost to pg, restarting') await self._connection_manager.restart() await self.initialize_tid_statements() self._connection_initialized_on = time.time() raise ConflictError('Restarting connection to postgresql') async def has_unique_constraint(self): table_name = clear_table_name(self._objects_table_name) result = await self.read_conn.fetch(''' SELECT * FROM pg_indexes WHERE tablename = '{}' AND indexname = '{}_parent_id_id_key'; '''.format(table_name, table_name)) return len(result) > 0 async def initialize(self, loop=None, **kw): self._connection_options = kw if self._connection_manager is None: self._connection_manager = PGConnectionManager( dsn=self._dsn, pool_size=self._pool_size, connection_options=self._connection_options, conn_acquire_timeout=self._conn_acquire_timeout, vacuum_class=self._vacuum_class) await self._connection_manager.initialize(loop, **kw) async with self.lock: if await self.has_unique_constraint(): self._supports_unique_constraints = True trash_sql = self._sql.get('CREATE_TRASH', self._objects_table_name) try: await self.initialize_tid_statements() await self.read_conn.execute(trash_sql) except asyncpg.exceptions.ReadOnlySQLTransactionError: # Not necessary for read-only pg pass except (asyncpg.exceptions.UndefinedTableError, asyncpg.exceptions.InvalidSchemaNameError): await self.create() # only available on new databases await self.read_conn.execute( self._unique_constraint.format( objects_table_name=self._objects_table_name, constraint_name=clear_table_name( self._objects_table_name), TRASHED_ID=TRASHED_ID)) self._supports_unique_constraints = True await self.initialize_tid_statements() await self.read_conn.execute(trash_sql) # migrate to larger VARCHAR size... result = await self.read_conn.fetch(""" select * from information_schema.columns where table_name='{}'""".format(self._objects_table_name)) if len(result) > 0 and result[0][ 'character_maximum_length'] != MAX_UID_LENGTH: log.warn('Migrating VARCHAR key length') await self.read_conn.execute(f''' ALTER TABLE {self._objects_table_name} ALTER COLUMN zoid TYPE varchar({MAX_UID_LENGTH})''' ) await self.read_conn.execute(f''' ALTER TABLE {self._objects_table_name} ALTER COLUMN of TYPE varchar({MAX_UID_LENGTH})''' ) await self.read_conn.execute(f''' ALTER TABLE {self._objects_table_name} ALTER COLUMN parent_id TYPE varchar({MAX_UID_LENGTH})''' ) await self.read_conn.execute(f''' ALTER TABLE {self._blobs_table_name} ALTER COLUMN bid TYPE varchar({MAX_UID_LENGTH})''' ) await self.read_conn.execute(f''' ALTER TABLE {self._blobs_table_name} ALTER COLUMN zoid TYPE varchar({MAX_UID_LENGTH})''' ) self._connection_initialized_on = time.time() async def initialize_tid_statements(self): self._stmt_next_tid = await self.read_conn.prepare( NEXT_TID.format(schema=self._db_schema)) self._stmt_max_tid = await self.read_conn.prepare( MAX_TID.format(schema=self._db_schema)) async def remove(self): """Reset the tables""" async with self.pool.acquire() as conn: await conn.execute("DROP TABLE IF EXISTS {};".format( self._blobs_table_name)) await conn.execute("DROP TABLE IF EXISTS {};".format( self._objects_table_name)) @restart_conn_on_exception async def open(self): conn = await self.pool.acquire(timeout=self._conn_acquire_timeout) return conn async def close(self, con): try: await shield(asyncio.wait_for(self.pool.release(con, timeout=1), 1)) except (asyncio.CancelledError, RuntimeError, asyncio.TimeoutError, asyncpg.exceptions.ConnectionDoesNotExistError): pass async def terminate(self, conn): conn.terminate() async def load(self, txn, oid): sql = self._sql.get('GET_OID', self._objects_table_name) async with txn._lock: objects = await self.get_one_row(txn, sql, oid) if objects is None: raise KeyError(oid) return objects @profilable async def store(self, oid, old_serial, writer, obj, txn): assert oid is not None pickled = writer.serialize() # This calls __getstate__ of obj if len(pickled) >= self._large_record_size: log.info(f"Large object {obj.__class__}: {len(pickled)}") if self._store_json: json_dict = await writer.get_json() json = ujson.dumps(json_dict) else: json = {} part = writer.part if part is None: part = 0 update = False statement_sql = self._sql.get('NAIVE_UPSERT', self._objects_table_name) if not obj.__new_marker__ and obj.__serial__ is not None: # we should be confident this is an object update statement_sql = self._sql.get('UPDATE', self._objects_table_name) update = True conn = await txn.get_connection() async with txn._lock: try: result = await conn.fetch( statement_sql, oid, # The OID of the object txn._tid, # Our TID len(pickled), # Len of the object part, # Partition indicator writer.resource, # Is a resource ? writer.of, # It belogs to a main old_serial, # Old serial writer.parent_id, # Parent OID writer.id, # Traversal ID writer.type, # Guillotina type json, # JSON catalog pickled # Pickle state) ) except asyncpg.exceptions.UniqueViolationError as ex: if 'Key (parent_id, id)' in ex.detail: raise ConflictIdOnContainer(ex) raise except asyncpg.exceptions.ForeignKeyViolationError: txn.deleted[obj.__uuid__] = obj raise TIDConflictError( f'Bad value inserting into database that could be caused ' f'by a bad cache value. This should resolve on request retry.', oid, txn, old_serial, writer) except asyncpg.exceptions._base.InterfaceError as ex: if 'another operation is in progress' in ex.args[0]: raise ConflictError( f'asyncpg error, another operation in progress.', oid, txn, old_serial, writer) raise except asyncpg.exceptions.DeadlockDetectedError: raise ConflictError(f'Deadlock detected.', oid, txn, old_serial, writer) if len(result) != 1 or result[0]['count'] != 1: if update: # raise tid conflict error raise TIDConflictError( f'Mismatch of tid of object being updated. This is likely ' f'caused by a cache invalidation race condition and should ' f'be an edge case. This should resolve on request retry.', oid, txn, old_serial, writer) else: log.error('Incorrect response count from database update. ' 'This should not happen. tid: {}'.format( txn._tid)) await txn._cache.store_object(obj, pickled) async def _txn_oid_commit_hook(self, status, oid): await self._connection_manager._vacuum.add_to_queue( oid, self._objects_table_name) async def delete(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('TRASH_PARENT_ID', self._objects_table_name) async with txn._lock: # for delete, we reassign the parent id and delete in the vacuum task await conn.execute(sql, oid) if self._autovacuum: txn.add_after_commit_hook(self._txn_oid_commit_hook, oid) async def _check_bad_connection(self, ex): # we do not use transaction lock here but a storage lock because # a storage object has a shard conn for reads for err in ('connection is closed', 'pool is closed', 'connection was closed'): if err in str(ex): if (time.time() - self._connection_initialized_on ) > BAD_CONNECTION_RESTART_DELAY: # we need to make sure we aren't calling this over and over again async with self.lock: return await self.restart_connection() @restart_conn_on_exception async def get_next_tid(self, txn): async with self.lock: return await self._stmt_next_tid.fetchval() @restart_conn_on_exception async def get_current_tid(self, txn): async with self.lock: return await self._stmt_max_tid.fetchval() async def get_one_row(self, txn, sql, *args, prepare=False): conn = await txn.get_connection() # Helper function to provide easy adaptation to cockroach if prepare: # latest version of asyncpg has prepare bypassing statement cache smt = await conn.prepare(sql) return await smt.fetchrow(*args) else: return await conn.fetchrow(sql, *args) def _db_transaction_factory(self, txn): # make sure asycpg knows this is a new transaction if txn._db_conn._con is not None: txn._db_conn._con._top_xact = None return txn._db_conn.transaction( readonly=txn._manager._storage._read_only) @restart_conn_on_exception async def _async_db_transaction_factory(self, txn): return self._db_transaction_factory(txn) async def start_transaction(self, txn, retries=0): error = None conn = await txn.get_connection() async with txn._lock: txn._db_txn = await self._async_db_transaction_factory(txn) try: await txn._db_txn.start() return except (asyncpg.exceptions.InterfaceError, asyncpg.exceptions.InternalServerError) as ex: error = ex if error is not None: if retries > 2: raise error # pylint: disable=E0702 restart = rollback = False if isinstance(error, asyncpg.exceptions.InternalServerError): restart = True if error.sqlstate == 'XX000': rollback = True elif ('manually started transaction' in error.args[0] or 'connection is closed' in error.args[0]): restart = True if 'manually started transaction' in error.args[0]: rollback = True if rollback: try: # thinks we're manually in txn, manually rollback and try again... await conn.execute('ROLLBACK;') except asyncpg.exceptions._base.InterfaceError: # we're okay with this error here... pass if restart: await self.close(conn) txn._db_conn = await self.open() return await self.start_transaction(txn, retries + 1) async def get_conflicts(self, txn): async with self.lock: if len(txn.modified) == 0: return [] # use storage lock instead of transaction lock if len(txn.modified) < 1000: # if it's too large, we're not going to check on object ids modified_oids = [k for k in txn.modified.keys()] sql = self._sql.get('TXN_CONFLICTS_ON_OIDS', self._objects_table_name) return await self.read_conn.fetch(sql, txn._tid, modified_oids) else: sql = self._sql.get('TXN_CONFLICTS', self._objects_table_name) return await self.read_conn.fetch(sql, txn._tid) async def commit(self, transaction): if transaction._db_txn is not None: async with transaction._lock: await transaction._db_txn.commit() elif (self._transaction_strategy not in ('none', 'tidonly') and not transaction._skip_commit): log.warning('Do not have db transaction to commit') return transaction._tid async def abort(self, transaction): if transaction._db_txn is not None: async with transaction._lock: try: await transaction._db_txn.rollback() except asyncpg.exceptions._base.InterfaceError: # we're okay with this error here... pass # reads don't need transaction necessarily so don't log # else: # log.warning('Do not have db transaction to rollback') # Introspection async def get_page_of_keys(self, txn, oid, page=1, page_size=1000): conn = await txn.get_connection() keys = [] sql = self._sql.get('BATCHED_GET_CHILDREN_KEYS', self._objects_table_name) for record in await conn.fetch(sql, oid, page_size, (page - 1) * page_size): keys.append(record['id']) return keys async def keys(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('GET_CHILDREN_KEYS', self._objects_table_name) async with txn._lock: result = await conn.fetch(sql, oid) return result async def get_child(self, txn, parent_oid, id): sql = self._sql.get('GET_CHILD', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, parent_oid, id) return result async def get_children(self, txn, parent_oid, ids): conn = await txn.get_connection() sql = self._sql.get('GET_CHILDREN_BATCH', self._objects_table_name) async with txn._lock: return await conn.fetch(sql, parent_oid, ids) async def has_key(self, txn, parent_oid, id): sql = self._sql.get('EXIST_CHILD', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, parent_oid, id) if result is None: return False else: return True async def len(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('NUM_CHILDREN', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql, oid) return result async def items(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('GET_CHILDREN', self._objects_table_name) async for record in conn.cursor(sql, oid): # locks are dangerous in cursors since comsuming code might do # sub-queries and they you end up with a deadlock yield record async def get_annotation(self, txn, oid, id): sql = self._sql.get('GET_ANNOTATION', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, oid, id, prepare=True) if result is not None and result['parent_id'] == TRASHED_ID: result = None return result async def get_annotation_keys(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('GET_ANNOTATIONS_KEYS', self._objects_table_name) async with txn._lock: result = await conn.fetch(sql, oid) items = [] for item in result: if item['parent_id'] != TRASHED_ID: items.append(item) return items async def write_blob_chunk(self, txn, bid, oid, chunk_index, data): sql = self._sql.get('HAS_OBJECT', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, oid) if result is None: # check if we have a referenced ob, could be new and not in db yet. # if so, create a stub for it here... conn = await txn.get_connection() async with txn._lock: await conn.execute( f'''INSERT INTO {self._objects_table_name} (zoid, tid, state_size, part, resource, type) VALUES ($1::varchar({MAX_UID_LENGTH}), -1, 0, 0, TRUE, 'stub')''', oid) conn = await txn.get_connection() sql = self._sql.get('INSERT_BLOB_CHUNK', self._blobs_table_name) async with txn._lock: return await conn.execute(sql, bid, oid, chunk_index, data) async def read_blob_chunk(self, txn, bid, chunk=0): sql = self._sql.get('READ_BLOB_CHUNK', self._blobs_table_name) async with txn._lock: return await self.get_one_row(txn, sql, bid, chunk) async def read_blob_chunks(self, txn, bid): conn = await txn.get_connection() async for record in conn.cursor(bid): # locks are dangerous in cursors since comsuming code might do # sub-queries and they you end up with a deadlock yield record async def del_blob(self, txn, bid): conn = await txn.get_connection() sql = self._sql.get('DELETE_BLOB', self._blobs_table_name) async with txn._lock: await conn.execute(sql, bid) async def get_total_number_of_objects(self, txn): conn = await txn.get_connection() sql = self._sql.get('NUM_ROWS', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql) return result async def get_total_number_of_resources(self, txn): conn = await txn.get_connection() sql = self._sql.get('NUM_RESOURCES', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql) return result async def get_total_resources_of_type(self, txn, type_): conn = await txn.get_connection() sql = self._sql.get('NUM_RESOURCES_BY_TYPE', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql, type_) return result # Massive treatment without security async def _get_page_resources_of_type(self, txn, type_, page, page_size): conn = await txn.get_connection() async with txn._lock: keys = [] sql = self._sql.get('RESOURCES_BY_TYPE', self._objects_table_name) for record in await conn.fetch(sql, type_, page_size, (page - 1) * page_size): keys.append(record) return keys
class PGVacuum: def __init__(self, manager, loop): self._manager = manager self._loop = loop self._queue = asyncio.Queue(loop=loop) self._closed = False self._active = False self._sql = SQLStatements() async def initialize(self): while not self._closed: try: await self._initialize() except (concurrent.futures.CancelledError, RuntimeError): # we're okay with the task getting cancelled return async def _initialize(self): while not self._closed: oid = None try: oid, table_name = await self._queue.get() self._active = True await shield(self.vacuum(oid, table_name)) except (concurrent.futures.CancelledError, RuntimeError): raise except Exception: log.warning(f'Error vacuuming oid {oid}', exc_info=True) finally: self._active = False try: self._queue.task_done() except ValueError: pass async def run(self, table_name): ''' get existing trashed objects, push them on the queue... there might be contention, but that is okay ''' async with self._manager.pool.acquire( timeout=self._manager._conn_acquire_timeout) as conn: try: sql = self._sql.get('GET_TRASHED_OBJECTS', table_name) for record in await conn.fetch(sql): self._queue.put_nowait((record['zoid'], table_name)) except concurrent.futures.TimeoutError: log.info('Timed out connecting to storage') except Exception: log.warning('Error deleting trashed object', exc_info=True) async def add_to_queue(self, oid, table_name): if self._closed: raise Exception('Closing down') await self._queue.put((oid, table_name)) async def vacuum(self, oid, table_name): ''' DELETED objects has parent id changed to the trashed ob for the oid... ''' async with self._manager.pool.acquire( timeout=self._manager._conn_acquire_timeout) as conn: sql = self._sql.get('DELETE_OBJECT', table_name) try: await conn.execute(sql, oid) except Exception: log.warning('Error deleting trashed object', exc_info=True) async def finalize(self): self._closed = True try: await asyncio.wait_for(self._queue.join(), 2) except asyncio.TimeoutError: pass @property def size(self): return self._queue.qsize()
class PostgresqlStorage(BaseStorage): """Storage to a relational database, based on invalidation polling""" _dsn = None _partition_class = None _large_record_size = 1 << 24 _vacuum_class = PGVacuum _connection_manager_class = PGConnectionManager _objects_table_name = "objects" _blobs_table_name = "blobs" _object_schema = { "zoid": f"VARCHAR({MAX_UID_LENGTH}) NOT NULL PRIMARY KEY", "tid": "BIGINT NOT NULL", "state_size": "BIGINT NOT NULL", "part": "BIGINT NOT NULL", "resource": "BOOLEAN NOT NULL", "of": f"VARCHAR({MAX_UID_LENGTH}) REFERENCES {{objects_table_name}} ON DELETE CASCADE", "otid": "BIGINT", "parent_id": f"VARCHAR({MAX_UID_LENGTH}) REFERENCES {{objects_table_name}} ON DELETE CASCADE", # noqa "id": "TEXT", "type": "TEXT NOT NULL", "json": "JSONB", "state": "BYTEA", } _blob_schema = { "bid": f"VARCHAR({MAX_UID_LENGTH}) NOT NULL", "zoid": f"VARCHAR({MAX_UID_LENGTH}) NOT NULL REFERENCES {{objects_table_name}} ON DELETE CASCADE", "chunk_index": "INT NOT NULL", "data": "BYTEA", } _initialize_statements = [ "CREATE INDEX IF NOT EXISTS {object_table_name}_tid ON {objects_table_name} (tid);", "CREATE INDEX IF NOT EXISTS {object_table_name}_of ON {objects_table_name} (of);", "CREATE INDEX IF NOT EXISTS {object_table_name}_part ON {objects_table_name} (part);", "CREATE INDEX IF NOT EXISTS {object_table_name}_parent ON {objects_table_name} (parent_id);", "CREATE INDEX IF NOT EXISTS {object_table_name}_id ON {objects_table_name} (id);", "CREATE INDEX IF NOT EXISTS {object_table_name}_type ON {objects_table_name} (type);", "CREATE INDEX IF NOT EXISTS {blob_table_name}_bid ON {blobs_table_name} (bid);", "CREATE INDEX IF NOT EXISTS {blob_table_name}_zoid ON {blobs_table_name} (zoid);", "CREATE INDEX IF NOT EXISTS {blob_table_name}_chunk ON {blobs_table_name} (chunk_index);", "ALTER TABLE {objects_table_name} ADD CONSTRAINT {object_table_name}_parent_id_zoid_check CHECK (parent_id != zoid) NOT VALID;", # noqa ] _unique_constraints = [ """CREATE UNIQUE INDEX CONCURRENTLY {constraint_name}_parent_id_id_key ON {objects_table_name} (parent_id, id) WHERE parent_id != '{TRASHED_ID}' """, """CREATE UNIQUE INDEX CONCURRENTLY {constraint_name}_annotations_unique ON {objects_table_name} (of, id);""", ] def __init__( self, dsn=None, partition=None, read_only=False, name=None, pool_size=13, transaction_strategy="resolve_readcommitted", conn_acquire_timeout=20, db_schema="public", store_json=True, objects_table_name="objects", blobs_table_name="blobs", connection_manager=None, autovacuum=True, **options, ): super(PostgresqlStorage, self).__init__(read_only, transaction_strategy=transaction_strategy) self._dsn = dsn self._pool_size = pool_size self._partition_class = partition self._read_only = read_only self.__name__ = name self._conn_acquire_timeout = conn_acquire_timeout self._options = options self._store_json = store_json self._connection_options = {} self._connection_initialized_on = time.time() self._db_schema = db_schema self._objects_table_name = f"{db_schema}.{objects_table_name}" self._blobs_table_name = f"{db_schema}.{blobs_table_name}" self._sql = SQLStatements() self._connection_manager = connection_manager self._autovacuum = autovacuum async def finalize(self): await self._connection_manager.close() @property def sql(self): return self._sql @property def read_conn(self): return self._connection_manager.read_conn @property def pool(self): return self._connection_manager.pool @property def connection_manager(self): return self._connection_manager @property def lock(self): return self._connection_manager.lock @property def stmt_next_tid(self): return self._connection_manager._stmt_next_tid @property def stmt_max_tid(self): return self._connection_manager._stmt_max_tid @property def objects_table_name(self): return self._objects_table_name async def create(self, conn=None): if conn is None: conn = self.read_conn # Check DB log.info("Creating initial database objects") statements = [] if self._db_schema and self._db_schema != "public": statements.extend([f"CREATE SCHEMA IF NOT EXISTS {self._db_schema}"]) statements.extend( [ get_table_definition(self._objects_table_name, self._object_schema), get_table_definition( self._blobs_table_name, self._blob_schema, primary_keys=("bid", "zoid", "chunk_index") ), ] ) statements.extend(self._initialize_statements) for statement in statements: otable_name = clear_table_name(self._objects_table_name) if otable_name == "objects": otable_name = "object" btable_name = clear_table_name(self._blobs_table_name) if btable_name == "blobs": btable_name = "blob" statement = statement.format( objects_table_name=self._objects_table_name, blobs_table_name=self._blobs_table_name, # singular, index names object_table_name=otable_name, blob_table_name=btable_name, schema=self._db_schema, ) try: await conn.execute(statement) except asyncpg.exceptions.UniqueViolationError: # this is okay on creation, means 2 getting created at same time pass async def restart_connection(self, timeout=0.1): log.error("Connection potentially lost to pg, restarting") await self._connection_manager.restart() self._connection_initialized_on = time.time() raise ConflictError("Restarting connection to postgresql") async def has_unique_constraint(self, conn): table_name = clear_table_name(self._objects_table_name) result = await conn.fetch( """ SELECT * FROM pg_indexes WHERE tablename = '{}' AND indexname = '{}_parent_id_id_key'; """.format( table_name, table_name ) ) return len(result) > 0 async def initialize(self, loop=None, **kw): self._connection_options = kw if self._connection_manager is None: self._connection_manager = self._connection_manager_class( dsn=self._dsn, pool_size=self._pool_size, connection_options=self._connection_options, conn_acquire_timeout=self._conn_acquire_timeout, vacuum_class=self._vacuum_class, autovacuum=self._autovacuum, db_schema=self._db_schema, ) await self._connection_manager.initialize(loop, **kw) async with self.pool.acquire(timeout=self._conn_acquire_timeout) as conn: if await self.has_unique_constraint(conn): self._supports_unique_constraints = True trash_sql = self._sql.get("CREATE_TRASH", self._objects_table_name) try: await conn.execute(trash_sql) except asyncpg.exceptions.ReadOnlySQLTransactionError: # Not necessary for read-only pg pass except (asyncpg.exceptions.UndefinedTableError, asyncpg.exceptions.InvalidSchemaNameError): async with conn.transaction(): await self.create(conn) # only available on new databases for constraint in self._unique_constraints: await conn.execute( constraint.format( objects_table_name=self._objects_table_name, constraint_name=clear_table_name(self._objects_table_name), TRASHED_ID=TRASHED_ID, ).replace("CONCURRENTLY", "") ) self._supports_unique_constraints = True await conn.execute(trash_sql) await notify(StorageCreatedEvent(self, db_conn=conn)) self._connection_initialized_on = time.time() async def remove(self): """Reset the tables""" async with self.pool.acquire(timeout=self._conn_acquire_timeout) as conn: await conn.execute("DROP TABLE IF EXISTS {} CASCADE;".format(self._blobs_table_name)) await conn.execute("DROP TABLE IF EXISTS {} CASCADE;".format(self._objects_table_name)) @restart_conn_on_exception async def open(self): return await self.pool.acquire(timeout=self._conn_acquire_timeout) async def close(self, con): try: await shield(self.pool.release(con, timeout=1)) except (asyncio.CancelledError, asyncio.TimeoutError, asyncpg.exceptions.ConnectionDoesNotExistError): log.warning("Exception on connection close", exc_info=True) async def terminate(self, conn): log.warning(f"Terminate connection {conn}", exc_info=True) conn.terminate() async def load(self, txn, oid): sql = self._sql.get("GET_OID", self._objects_table_name) async with txn._lock: objects = await self.get_one_row(txn, sql, oid) if objects is None: raise KeyError(oid) return objects @profilable async def store(self, oid, old_serial, writer, obj, txn): assert oid is not None pickled = writer.serialize() # This calls __getstate__ of obj if len(pickled) >= self._large_record_size: log.info(f"Large object {obj.__class__}: {len(pickled)}") if self._store_json: json_dict = await writer.get_json() json = orjson.dumps(json_dict).decode("utf-8") else: json = None part = writer.part if part is None: part = 0 update = False statement_sql = self._sql.get("NAIVE_UPSERT", self._objects_table_name) if not obj.__new_marker__ and obj.__serial__ is not None: # we should be confident this is an object update statement_sql = self._sql.get("UPDATE", self._objects_table_name) update = True conn = await txn.get_connection() async with txn._lock: try: result = await conn.fetch( statement_sql, oid, # The OID of the object txn._tid, # Our TID len(pickled), # Len of the object part, # Partition indicator writer.resource, # Is a resource ? writer.of, # It belogs to a main old_serial, # Old serial writer.parent_id, # Parent OID writer.id, # Traversal ID writer.type, # Guillotina type json, # JSON catalog pickled, # Pickle state) ) except asyncpg.exceptions.UniqueViolationError as ex: if "Key (parent_id, id)" in ex.detail or "Key (of, id)" in ex.detail: raise ConflictIdOnContainer(ex) raise except asyncpg.exceptions.ForeignKeyViolationError: txn.deleted[obj.__uuid__] = obj raise TIDConflictError( f"Bad value inserting into database that could be caused " f"by a bad cache value. This should resolve on request retry.", oid, txn, old_serial, writer, ) except asyncpg.exceptions._base.InterfaceError as ex: if "another operation is in progress" in ex.args[0]: raise ConflictError( f"asyncpg error, another operation in progress.", oid, txn, old_serial, writer ) raise except asyncpg.exceptions.DeadlockDetectedError: raise ConflictError(f"Deadlock detected.", oid, txn, old_serial, writer) if len(result) != 1 or result[0]["count"] != 1: if update: # raise tid conflict error raise TIDConflictError( f"Mismatch of tid of object being updated. This is likely " f"caused by a cache invalidation race condition and should " f"be an edge case. This should resolve on request retry.", oid, txn, old_serial, writer, ) else: log.error( "Incorrect response count from database update. " "This should not happen. tid: {}".format(txn._tid) ) await txn._cache.store_object(obj, pickled) async def _txn_oid_commit_hook(self, status, oid): if self._connection_manager._vacuum is not None: await self._connection_manager._vacuum.add_to_queue(oid, self._objects_table_name) async def delete(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get("TRASH_PARENT_ID", self._objects_table_name) async with txn._lock: # for delete, we reassign the parent id and delete in the vacuum task await conn.execute(sql, oid) if self._autovacuum: txn.add_after_commit_hook(self._txn_oid_commit_hook, oid) async def _check_bad_connection(self, ex): # we do not use transaction lock here but a storage lock because # a storage object has a shard conn for reads for err in ("connection is closed", "pool is closed", "connection was closed"): if err in str(ex): if (time.time() - self._connection_initialized_on) > BAD_CONNECTION_RESTART_DELAY: # we need to make sure we aren't calling this over and over again async with self.lock: return await self.restart_connection() @restart_conn_on_exception async def get_next_tid(self, txn): async with self.lock: return await self.stmt_next_tid.fetchval() @restart_conn_on_exception async def get_current_tid(self, txn): async with self.lock: return await self.stmt_max_tid.fetchval() async def get_one_row(self, txn, sql, *args, prepare=False): conn = await txn.get_connection() # Helper function to provide easy adaptation to cockroach if prepare: # latest version of asyncpg has prepare bypassing statement cache smt = await conn.prepare(sql) return await smt.fetchrow(*args) else: return await conn.fetchrow(sql, *args) def _db_transaction_factory(self, txn): # make sure asycpg knows this is a new transaction if txn._db_conn._con is not None: txn._db_conn._con._top_xact = None return txn._db_conn.transaction(readonly=txn._manager._storage._read_only) @restart_conn_on_exception async def _async_db_transaction_factory(self, txn): return self._db_transaction_factory(txn) async def start_transaction(self, txn, retries=0): error = None conn = await txn.get_connection() async with txn._lock: txn._db_txn = await self._async_db_transaction_factory(txn) try: await txn._db_txn.start() return except (asyncpg.exceptions.InterfaceError, asyncpg.exceptions.InternalServerError) as ex: error = ex if error is not None: if retries > 2: raise error # pylint: disable=E0702 restart = rollback = False if isinstance(error, asyncpg.exceptions.InternalServerError): restart = True if error.sqlstate == "XX000": rollback = True elif "manually started transaction" in error.args[0] or "connection is closed" in error.args[0]: restart = True if "manually started transaction" in error.args[0]: rollback = True if rollback: try: # thinks we're manually in txn, manually rollback and try again... await conn.execute("ROLLBACK;") except asyncpg.exceptions._base.InterfaceError: # we're okay with this error here... pass if restart: await self.close(conn) txn._db_conn = await self.open() return await self.start_transaction(txn, retries + 1) async def get_conflicts(self, txn): async with self.lock: if len(txn.modified) == 0: return [] # use storage lock instead of transaction lock if len(txn.modified) < 1000: # if it's too large, we're not going to check on object ids modified_oids = [k for k in txn.modified.keys()] sql = self._sql.get("TXN_CONFLICTS_ON_OIDS", self._objects_table_name) return await self.read_conn.fetch(sql, txn._tid, modified_oids) else: sql = self._sql.get("TXN_CONFLICTS", self._objects_table_name) return await self.read_conn.fetch(sql, txn._tid) async def commit(self, transaction): if transaction._db_txn is not None: async with transaction._lock: await transaction._db_txn.commit() elif self._transaction_strategy not in ("none", "tidonly") and not transaction._skip_commit: log.warning("Do not have db transaction to commit") return transaction._tid async def abort(self, transaction): if transaction._db_txn is not None: async with transaction._lock: try: await transaction._db_txn.rollback() except asyncpg.exceptions._base.InterfaceError: # we're okay with this error here... pass # reads don't need transaction necessarily so don't log # else: # log.warning('Do not have db transaction to rollback') # Introspection async def get_page_of_keys(self, txn, oid, page=1, page_size=1000): conn = await txn.get_connection() keys = [] sql = self._sql.get("BATCHED_GET_CHILDREN_KEYS", self._objects_table_name) for record in await conn.fetch(sql, oid, page_size, (page - 1) * page_size): keys.append(record["id"]) return keys async def keys(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get("GET_CHILDREN_KEYS", self._objects_table_name) async with txn._lock: result = await conn.fetch(sql, oid) return result async def get_child(self, txn, parent_oid, id): sql = self._sql.get("GET_CHILD", self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, parent_oid, id) return result async def get_children(self, txn, parent_oid, ids): conn = await txn.get_connection() sql = self._sql.get("GET_CHILDREN_BATCH", self._objects_table_name) async with txn._lock: return await conn.fetch(sql, parent_oid, ids) async def has_key(self, txn, parent_oid, id): sql = self._sql.get("EXIST_CHILD", self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, parent_oid, id) if result is None: return False else: return True async def len(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get("NUM_CHILDREN", self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql, oid) return result async def items(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get("GET_CHILDREN", self._objects_table_name) async for record in conn.cursor(sql, oid): # locks are dangerous in cursors since comsuming code might do # sub-queries and they you end up with a deadlock yield record async def get_annotation(self, txn, oid, id): sql = self._sql.get("GET_ANNOTATION", self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, oid, id, prepare=True) if result is not None and result["parent_id"] == TRASHED_ID: result = None return result async def get_annotation_keys(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get("GET_ANNOTATIONS_KEYS", self._objects_table_name) async with txn._lock: result = await conn.fetch(sql, oid) items = [] for item in result: if item["parent_id"] != TRASHED_ID: items.append(item) return items async def write_blob_chunk(self, txn, bid, oid, chunk_index, data): sql = self._sql.get("HAS_OBJECT", self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, oid) if result is None: # check if we have a referenced ob, could be new and not in db yet. # if so, create a stub for it here... conn = await txn.get_connection() async with txn._lock: await conn.execute( f"""INSERT INTO {self._objects_table_name} (zoid, tid, state_size, part, resource, type) VALUES ($1::varchar({MAX_UID_LENGTH}), -1, 0, 0, TRUE, 'stub')""", oid, ) conn = await txn.get_connection() sql = self._sql.get("INSERT_BLOB_CHUNK", self._blobs_table_name) async with txn._lock: return await conn.execute(sql, bid, oid, chunk_index, data) async def read_blob_chunk(self, txn, bid, chunk=0): sql = self._sql.get("READ_BLOB_CHUNK", self._blobs_table_name) async with txn._lock: return await self.get_one_row(txn, sql, bid, chunk) async def read_blob_chunks(self, txn, bid): conn = await txn.get_connection() async for record in conn.cursor(bid): # locks are dangerous in cursors since comsuming code might do # sub-queries and they you end up with a deadlock yield record async def del_blob(self, txn, bid): conn = await txn.get_connection() sql = self._sql.get("DELETE_BLOB", self._blobs_table_name) async with txn._lock: await conn.execute(sql, bid) async def get_total_number_of_objects(self, txn): conn = await txn.get_connection() sql = self._sql.get("NUM_ROWS", self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql) return result async def get_total_number_of_resources(self, txn): conn = await txn.get_connection() sql = self._sql.get("NUM_RESOURCES", self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql) return result async def get_total_resources_of_type(self, txn, type_): conn = await txn.get_connection() sql = self._sql.get("NUM_RESOURCES_BY_TYPE", self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql, type_) return result # Massive treatment without security async def _get_page_resources_of_type(self, txn, type_, page, page_size): conn = await txn.get_connection() async with txn._lock: keys = [] sql = self._sql.get("RESOURCES_BY_TYPE", self._objects_table_name) for record in await conn.fetch(sql, type_, page_size, (page - 1) * page_size): keys.append(record) return keys
class PostgresqlStorage(BaseStorage): """Storage to a relational database, based on invalidation polling""" _dsn = None _partition_class = None _large_record_size = 1 << 24 _vacuum_class = PGVacuum _objects_table_name = 'objects' _blobs_table_name = 'blobs' _object_schema = { 'zoid': f'VARCHAR({MAX_OID_LENGTH}) NOT NULL PRIMARY KEY', 'tid': 'BIGINT NOT NULL', 'state_size': 'BIGINT NOT NULL', 'part': 'BIGINT NOT NULL', 'resource': 'BOOLEAN NOT NULL', 'of': f'VARCHAR({MAX_OID_LENGTH}) REFERENCES {{objects_table_name}} ON DELETE CASCADE', 'otid': 'BIGINT', 'parent_id': f'VARCHAR({MAX_OID_LENGTH}) REFERENCES {{objects_table_name}} ON DELETE CASCADE', # noqa 'id': 'TEXT', 'type': 'TEXT NOT NULL', 'json': 'JSONB', 'state': 'BYTEA' } _blob_schema = { 'bid': f'VARCHAR({MAX_OID_LENGTH}) NOT NULL', 'zoid': f'VARCHAR({MAX_OID_LENGTH}) NOT NULL REFERENCES {{objects_table_name}} ON DELETE CASCADE', 'chunk_index': 'INT NOT NULL', 'data': 'BYTEA' } _initialize_statements = [ 'CREATE INDEX IF NOT EXISTS {object_table_name}_tid ON {objects_table_name} (tid);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_of ON {objects_table_name} (of);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_part ON {objects_table_name} (part);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_parent ON {objects_table_name} (parent_id);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_id ON {objects_table_name} (id);', 'CREATE INDEX IF NOT EXISTS {object_table_name}_type ON {objects_table_name} (type);', 'CREATE INDEX IF NOT EXISTS {blob_table_name}_bid ON {blobs_table_name} (bid);', 'CREATE INDEX IF NOT EXISTS {blob_table_name}_zoid ON {blobs_table_name} (zoid);', 'CREATE INDEX IF NOT EXISTS {blob_table_name}_chunk ON {blobs_table_name} (chunk_index);', 'CREATE SEQUENCE IF NOT EXISTS {schema}.tid_sequence;' ] _unique_constraint = """CREATE UNIQUE INDEX CONCURRENTLY {constraint_name}_parent_id_id_key ON {objects_table_name} (parent_id, id) WHERE parent_id != '{TRASHED_ID}'""" def __init__(self, dsn=None, partition=None, read_only=False, name=None, pool_size=13, transaction_strategy='resolve_readcommitted', conn_acquire_timeout=20, cache_strategy='dummy', db_schema='public', objects_table_name='objects', blobs_table_name='blobs', connection_manager=None, autovacuum=True, **options): super(PostgresqlStorage, self).__init__( read_only, transaction_strategy=transaction_strategy, cache_strategy=cache_strategy) self._dsn = dsn self._pool_size = pool_size self._partition_class = partition self._read_only = read_only self.__name__ = name self._conn_acquire_timeout = conn_acquire_timeout self._options = options self._connection_options = {} self._connection_initialized_on = time.time() self._db_schema = db_schema self._objects_table_name = f'{db_schema}.{objects_table_name}' self._blobs_table_name = f'{db_schema}.{blobs_table_name}' self._sql = SQLStatements() self._connection_manager = connection_manager self._autovacuum = autovacuum async def finalize(self): await self._connection_manager.close() @property def read_conn(self): return self._connection_manager.read_conn @property def pool(self): return self._connection_manager.pool @property def connection_manager(self): return self._connection_manager @property def lock(self): return self._connection_manager.lock async def vacuum(self): await self.connection_manager.vacuum.run(self._objects_table_name) while self.connection_manager.vacuum.size > 0: await asyncio.sleep(0.1) async def create(self): # Check DB log.info('Creating initial database objects') statements = [] if self._db_schema and self._db_schema != 'public': statements.extend([f'CREATE SCHEMA IF NOT EXISTS {self._db_schema}']) statements.extend([ get_table_definition(self._objects_table_name, self._object_schema), get_table_definition(self._blobs_table_name, self._blob_schema, primary_keys=('bid', 'zoid', 'chunk_index')) ]) statements.extend(self._initialize_statements) for statement in statements: otable_name = clear_table_name(self._objects_table_name) if otable_name == 'objects': otable_name = 'object' btable_name = clear_table_name(self._blobs_table_name) if btable_name == 'blobs': btable_name = 'blob' statement = statement.format( objects_table_name=self._objects_table_name, blobs_table_name=self._blobs_table_name, # singular, index names object_table_name=otable_name, blob_table_name=btable_name, schema=self._db_schema ) try: await self.read_conn.execute(statement) except asyncpg.exceptions.UniqueViolationError: # this is okay on creation, means 2 getting created at same time pass await self.initialize_tid_statements() async def restart_connection(self, timeout=0.1): log.error('Connection potentially lost to pg, restarting') await self._connection_manager.restart() await self.initialize_tid_statements() self._connection_initialized_on = time.time() raise ConflictError('Restarting connection to postgresql') async def has_unique_constraint(self): table_name = clear_table_name(self._objects_table_name) result = await self.read_conn.fetch(''' SELECT * FROM pg_indexes WHERE tablename = '{}' AND indexname = '{}_parent_id_id_key'; '''.format(table_name, table_name)) return len(result) > 0 async def initialize(self, loop=None, **kw): self._connection_options = kw if self._connection_manager is None: self._connection_manager = PGConnectionManager( dsn=self._dsn, pool_size=self._pool_size, connection_options=self._connection_options, conn_acquire_timeout=self._conn_acquire_timeout, vacuum_class=self._vacuum_class) await self._connection_manager.initialize(loop, **kw) async with self.lock: if await self.has_unique_constraint(): self._supports_unique_constraints = True trash_sql = self._sql.get('CREATE_TRASH', self._objects_table_name) try: await self.initialize_tid_statements() await self.read_conn.execute(trash_sql) except asyncpg.exceptions.ReadOnlySQLTransactionError: # Not necessary for read-only pg pass except (asyncpg.exceptions.UndefinedTableError, asyncpg.exceptions.InvalidSchemaNameError): await self.create() # only available on new databases await self.read_conn.execute(self._unique_constraint.format( objects_table_name=self._objects_table_name, constraint_name=clear_table_name(self._objects_table_name), TRASHED_ID=TRASHED_ID )) self._supports_unique_constraints = True await self.initialize_tid_statements() await self.read_conn.execute(trash_sql) # migrate to larger VARCHAR size... result = await self.read_conn.fetch(""" select * from information_schema.columns where table_name='{}'""".format(self._objects_table_name)) if len(result) > 0 and result[0]['character_maximum_length'] != MAX_OID_LENGTH: log.warn('Migrating VARCHAR key length') await self.read_conn.execute(f''' ALTER TABLE {self._objects_table_name} ALTER COLUMN zoid TYPE varchar({MAX_OID_LENGTH})''') await self.read_conn.execute(f''' ALTER TABLE {self._objects_table_name} ALTER COLUMN of TYPE varchar({MAX_OID_LENGTH})''') await self.read_conn.execute(f''' ALTER TABLE {self._objects_table_name} ALTER COLUMN parent_id TYPE varchar({MAX_OID_LENGTH})''') await self.read_conn.execute(f''' ALTER TABLE {self._blobs_table_name} ALTER COLUMN bid TYPE varchar({MAX_OID_LENGTH})''') await self.read_conn.execute(f''' ALTER TABLE {self._blobs_table_name} ALTER COLUMN zoid TYPE varchar({MAX_OID_LENGTH})''') self._connection_initialized_on = time.time() async def initialize_tid_statements(self): self._stmt_next_tid = await self.read_conn.prepare( NEXT_TID.format(schema=self._db_schema)) self._stmt_max_tid = await self.read_conn.prepare( MAX_TID.format(schema=self._db_schema)) async def remove(self): """Reset the tables""" async with self.pool.acquire() as conn: await conn.execute("DROP TABLE IF EXISTS {};".format(self._blobs_table_name)) await conn.execute("DROP TABLE IF EXISTS {};".format(self._objects_table_name)) @restart_conn_on_exception async def open(self): conn = await self.pool.acquire(timeout=self._conn_acquire_timeout) return conn async def close(self, con): try: await shield( asyncio.wait_for(self.pool.release(con, timeout=1), 1)) except (asyncio.CancelledError, RuntimeError, asyncio.TimeoutError, asyncpg.exceptions.ConnectionDoesNotExistError): pass async def terminate(self, conn): conn.terminate() async def load(self, txn, oid): sql = self._sql.get('GET_OID', self._objects_table_name) async with txn._lock: objects = await self.get_one_row(txn, sql, oid) if objects is None: raise KeyError(oid) return objects @profilable async def store(self, oid, old_serial, writer, obj, txn): assert oid is not None pickled = writer.serialize() # This calls __getstate__ of obj if len(pickled) >= self._large_record_size: log.info(f"Large object {obj.__class__}: {len(pickled)}") json_dict = await writer.get_json() json = ujson.dumps(json_dict) part = writer.part if part is None: part = 0 update = False statement_sql = self._sql.get('NAIVE_UPSERT', self._objects_table_name) if not obj.__new_marker__ and obj._p_serial is not None: # we should be confident this is an object update statement_sql = self._sql.get('UPDATE', self._objects_table_name) update = True conn = await txn.get_connection() async with txn._lock: try: result = await conn.fetch( statement_sql, oid, # The OID of the object txn._tid, # Our TID len(pickled), # Len of the object part, # Partition indicator writer.resource, # Is a resource ? writer.of, # It belogs to a main old_serial, # Old serial writer.parent_id, # Parent OID writer.id, # Traversal ID writer.type, # Guillotina type json, # JSON catalog pickled # Pickle state) ) except asyncpg.exceptions.UniqueViolationError as ex: if 'Key (parent_id, id)' in ex.detail: raise ConflictIdOnContainer(ex) raise except asyncpg.exceptions.ForeignKeyViolationError: txn.deleted[obj._p_oid] = obj raise TIDConflictError( f'Bad value inserting into database that could be caused ' f'by a bad cache value. This should resolve on request retry.', oid, txn, old_serial, writer) except asyncpg.exceptions._base.InterfaceError as ex: if 'another operation is in progress' in ex.args[0]: raise ConflictError( f'asyncpg error, another operation in progress.', oid, txn, old_serial, writer) raise except asyncpg.exceptions.DeadlockDetectedError: raise ConflictError(f'Deadlock detected.', oid, txn, old_serial, writer) if len(result) != 1 or result[0]['count'] != 1: if update: # raise tid conflict error raise TIDConflictError( f'Mismatch of tid of object being updated. This is likely ' f'caused by a cache invalidation race condition and should ' f'be an edge case. This should resolve on request retry.', oid, txn, old_serial, writer) else: log.error('Incorrect response count from database update. ' 'This should not happen. tid: {}'.format(txn._tid)) await txn._cache.store_object(obj, pickled) async def _txn_oid_commit_hook(self, status, oid): await self._connection_manager._vacuum.add_to_queue(oid, self._objects_table_name) async def delete(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('TRASH_PARENT_ID', self._objects_table_name) async with txn._lock: # for delete, we reassign the parent id and delete in the vacuum task try: await conn.execute(sql, oid) except asyncpg.exceptions.UniqueViolationError: # we already have ob with id, it has not been vacuumed yet, # fallback. sql = self._sql.get('DELETE_OBJECT', self._objects_table_name) await conn.execute(sql, oid) if self._autovacuum: txn.add_after_commit_hook(self._txn_oid_commit_hook, oid) async def _check_bad_connection(self, ex): # we do not use transaction lock here but a storage lock because # a storage object has a shard conn for reads for err in ('connection is closed', 'pool is closed', 'connection was closed'): if err in str(ex): if (time.time() - self._connection_initialized_on) > BAD_CONNECTION_RESTART_DELAY: # we need to make sure we aren't calling this over and over again async with self.lock: return await self.restart_connection() @restart_conn_on_exception async def get_next_tid(self, txn): async with self.lock: return await self._stmt_next_tid.fetchval() @restart_conn_on_exception async def get_current_tid(self, txn): async with self.lock: return await self._stmt_max_tid.fetchval() async def get_one_row(self, txn, sql, *args, prepare=False): conn = await txn.get_connection() # Helper function to provide easy adaptation to cockroach if prepare: # latest version of asyncpg has prepare bypassing statement cache smt = await conn.prepare(sql) return await smt.fetchrow(*args) else: return await conn.fetchrow(sql, *args) def _db_transaction_factory(self, txn): # make sure asycpg knows this is a new transaction if txn._db_conn._con is not None: txn._db_conn._con._top_xact = None return txn._db_conn.transaction(readonly=txn._manager._storage._read_only) @restart_conn_on_exception async def _async_db_transaction_factory(self, txn): return self._db_transaction_factory(txn) async def start_transaction(self, txn, retries=0): error = None conn = await txn.get_connection() async with txn._lock: txn._db_txn = await self._async_db_transaction_factory(txn) try: await txn._db_txn.start() return except (asyncpg.exceptions.InterfaceError, asyncpg.exceptions.InternalServerError) as ex: error = ex if error is not None: if retries > 2: raise error # pylint: disable=E0702 restart = rollback = False if isinstance(error, asyncpg.exceptions.InternalServerError): restart = True if error.sqlstate == 'XX000': rollback = True elif ('manually started transaction' in error.args[0] or 'connection is closed' in error.args[0]): restart = True if 'manually started transaction' in error.args[0]: rollback = True if rollback: try: # thinks we're manually in txn, manually rollback and try again... await conn.execute('ROLLBACK;') except asyncpg.exceptions._base.InterfaceError: # we're okay with this error here... pass if restart: await self.close(conn) txn._db_conn = await self.open() return await self.start_transaction(txn, retries + 1) async def get_conflicts(self, txn): async with self.lock: if len(txn.modified) == 0: return [] # use storage lock instead of transaction lock if len(txn.modified) < 1000: # if it's too large, we're not going to check on object ids modified_oids = [k for k in txn.modified.keys()] sql = self._sql.get('TXN_CONFLICTS_ON_OIDS', self._objects_table_name) return await self.read_conn.fetch( sql, txn._tid, modified_oids) else: sql = self._sql.get('TXN_CONFLICTS', self._objects_table_name) return await self.read_conn.fetch(sql, txn._tid) async def commit(self, transaction): if transaction._db_txn is not None: async with transaction._lock: await transaction._db_txn.commit() elif (self._transaction_strategy not in ('none', 'tidonly') and not transaction._skip_commit): log.warning('Do not have db transaction to commit') return transaction._tid async def abort(self, transaction): if transaction._db_txn is not None: async with transaction._lock: try: await transaction._db_txn.rollback() except asyncpg.exceptions._base.InterfaceError: # we're okay with this error here... pass # reads don't need transaction necessarily so don't log # else: # log.warning('Do not have db transaction to rollback') # Introspection async def get_page_of_keys(self, txn, oid, page=1, page_size=1000): conn = await txn.get_connection() keys = [] sql = self._sql.get('BATCHED_GET_CHILDREN_KEYS', self._objects_table_name) for record in await conn.fetch( sql, oid, page_size, (page - 1) * page_size): keys.append(record['id']) return keys async def keys(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('GET_CHILDREN_KEYS', self._objects_table_name) async with txn._lock: result = await conn.fetch(sql, oid) return result async def get_child(self, txn, parent_oid, id): sql = self._sql.get('GET_CHILD', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, parent_oid, id) return result async def get_children(self, txn, parent_oid, ids): conn = await txn.get_connection() sql = self._sql.get('GET_CHILDREN_BATCH', self._objects_table_name) async with txn._lock: return await conn.fetch(sql, parent_oid, ids) async def has_key(self, txn, parent_oid, id): sql = self._sql.get('EXIST_CHILD', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, parent_oid, id) if result is None: return False else: return True async def len(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('NUM_CHILDREN', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql, oid) return result async def items(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('GET_CHILDREN', self._objects_table_name) async for record in conn.cursor(sql, oid): # locks are dangerous in cursors since comsuming code might do # sub-queries and they you end up with a deadlock yield record async def get_annotation(self, txn, oid, id): sql = self._sql.get('GET_ANNOTATION', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, oid, id, prepare=True) if result is not None and result['parent_id'] == TRASHED_ID: result = None return result async def get_annotation_keys(self, txn, oid): conn = await txn.get_connection() sql = self._sql.get('GET_ANNOTATIONS_KEYS', self._objects_table_name) async with txn._lock: result = await conn.fetch(sql, oid) items = [] for item in result: if item['parent_id'] != TRASHED_ID: items.append(item) return items async def write_blob_chunk(self, txn, bid, oid, chunk_index, data): sql = self._sql.get('HAS_OBJECT', self._objects_table_name) async with txn._lock: result = await self.get_one_row(txn, sql, oid) if result is None: # check if we have a referenced ob, could be new and not in db yet. # if so, create a stub for it here... conn = await txn.get_connection() async with txn._lock: await conn.execute(f'''INSERT INTO {self._objects_table_name} (zoid, tid, state_size, part, resource, type) VALUES ($1::varchar({MAX_OID_LENGTH}), -1, 0, 0, TRUE, 'stub')''', oid) conn = await txn.get_connection() sql = self._sql.get('INSERT_BLOB_CHUNK', self._blobs_table_name) async with txn._lock: return await conn.execute( sql, bid, oid, chunk_index, data) async def read_blob_chunk(self, txn, bid, chunk=0): sql = self._sql.get('READ_BLOB_CHUNK', self._blobs_table_name) async with txn._lock: return await self.get_one_row(txn, sql, bid, chunk) async def read_blob_chunks(self, txn, bid): conn = await txn.get_connection() async for record in conn.cursor(bid): # locks are dangerous in cursors since comsuming code might do # sub-queries and they you end up with a deadlock yield record async def del_blob(self, txn, bid): conn = await txn.get_connection() sql = self._sql.get('DELETE_BLOB', self._blobs_table_name) async with txn._lock: await conn.execute(sql, bid) async def get_total_number_of_objects(self, txn): conn = await txn.get_connection() sql = self._sql.get('NUM_ROWS', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql) return result async def get_total_number_of_resources(self, txn): conn = await txn.get_connection() sql = self._sql.get('NUM_RESOURCES', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql) return result async def get_total_resources_of_type(self, txn, type_): conn = await txn.get_connection() sql = self._sql.get('NUM_RESOURCES_BY_TYPE', self._objects_table_name) async with txn._lock: result = await conn.fetchval(sql, type_) return result # Massive treatment without security async def _get_page_resources_of_type(self, txn, type_, page, page_size): conn = await txn.get_connection() async with txn._lock: keys = [] sql = self._sql.get('RESOURCES_BY_TYPE', self._objects_table_name) for record in await conn.fetch( sql, type_, page_size, (page - 1) * page_size): keys.append(record) return keys