def configure_engine(settings, test_setup=False): from contentbase.json_renderer import json_renderer engine_url = settings.get('sqlalchemy.url') if not engine_url: # Already setup by test fixture return None engine_opts = {} if engine_url.startswith('postgresql'): if settings.get('indexer_worker'): application_name = 'indexer_worker' elif settings.get('indexer'): application_name = 'indexer' else: application_name = 'app' engine_opts = dict( isolation_level='REPEATABLE READ', json_serializer=json_renderer.dumps, connect_args={'application_name': application_name} ) engine = engine_from_config(settings, 'sqlalchemy.', **engine_opts) if engine.url.drivername == 'postgresql': timeout = settings.get('postgresql.statement_timeout') if timeout: timeout = int(timeout) * 1000 set_postgresql_statement_timeout(engine, timeout) if test_setup: return engine if asbool(settings.get('create_tables', False)): Base.metadata.create_all(engine) DBSession.configure(bind=engine) return engine
def batch_upgrade(request): request.datastore = 'database' transaction.get().setExtendedInfo('upgrade', True) batch = request.json['batch'] root = request.root session = DBSession() results = [] for uuid in batch: item_type = None update = False error = False sp = session.begin_nested() try: item = find_resource(root, uuid) item_type = item.item_type update, errors = update_item(item) except Exception: logger.exception('Error updating: /%s/%s', item_type, uuid) sp.rollback() error = True else: if errors: errortext = [ '%s: %s' % ('/'.join(error.path) or '<root>', error.message) for error in errors] logger.error( 'Validation failure: /%s/%s\n%s', item_type, uuid, '\n'.join(errortext)) sp.rollback() error = True else: sp.commit() results.append((item_type, uuid, update, error)) return {'results': results}
def set_snapshot(xmin, snapshot_id): from contentbase.storage import DBSession global current_xmin_snapshot_id if current_xmin_snapshot_id == (xmin, snapshot_id): return clear_snapshot() current_xmin_snapshot_id = (xmin, snapshot_id) while True: txn = transaction.begin() txn.doom() if snapshot_id is not None: txn.setExtendedInfo('snapshot_id', snapshot_id) session = DBSession() connection = session.connection() db_xmin = connection.execute( "SELECT txid_snapshot_xmin(txid_current_snapshot());").scalar() if db_xmin >= xmin: break transaction.abort() log.info('Waiting for xmin %r to reach %r', db_xmin, xmin) time.sleep(0.1) registry = app.registry request = app.request_factory.blank('/_indexing_pool') request.registry = registry request.datastore = 'database' apply_request_extensions(request) request.invoke_subrequest = app.invoke_subrequest request.root = app.root_factory(request) request._stats = {} manager.push({'request': request, 'registry': registry})
def set_constraints(): self.state = 'checking' session = DBSession() session.flush() sp = self.connection.begin_nested() try: self.connection.execute('SET CONSTRAINTS ALL IMMEDIATE') except: sp.rollback() raise else: self.connection.execute('SET CONSTRAINTS ALL DEFERRED') finally: sp.commit() self.state = None
def session(transaction): """ Returns a setup session Depends on transaction as storage relies on some interaction there. """ from contentbase.storage import DBSession return DBSession()
def connection(request, engine_url): from clincoded import configure_engine from contentbase.storage import Base, DBSession from sqlalchemy.orm.scoping import ScopedRegistry # ``server`` thread must be in same scope if type(DBSession.registry) is not ScopedRegistry: DBSession.registry = ScopedRegistry(DBSession.session_factory, lambda: 0) engine_settings = { 'sqlalchemy.url': engine_url, } engine = configure_engine(engine_settings, test_setup=True) connection = engine.connect() tx = connection.begin() try: Base.metadata.create_all(bind=connection) session = DBSession(scope=None, bind=connection) DBSession.registry.set(session) yield connection finally: tx.rollback() connection.close() engine.dispose()
def app(app_settings): from contentbase.storage import DBSession DBSession.remove() DBSession.configure(bind=None) from clincoded import main app = main({}, **app_settings) yield app # Shutdown multiprocessing pool to close db conns. app.registry['indexer'].shutdown() # Dispose connections so postgres can tear down. DBSession.bind.pool.dispose() DBSession.remove() DBSession.configure(bind=None)
def no_deps(request, connection): from contentbase.storage import DBSession from sqlalchemy import event session = DBSession() @event.listens_for(session, 'after_flush') def check_dependencies(session, flush_context): assert not flush_context.cycles @event.listens_for(connection, "before_execute", retval=True) def before_execute(conn, clauseelement, multiparams, params): return clauseelement, multiparams, params @request.addfinalizer def remove(): event.remove(session, 'before_flush', check_dependencies)
def index(request): INDEX = request.registry.settings['contentbase.elasticsearch.index'] # Setting request.datastore here only works because routed views are not traversed. request.datastore = 'database' record = request.json.get('record', False) dry_run = request.json.get('dry_run', False) recovery = request.json.get('recovery', False) es = request.registry[ELASTIC_SEARCH] indexer = request.registry[INDEXER] session = DBSession() connection = session.connection() # http://www.postgresql.org/docs/9.3/static/functions-info.html#FUNCTIONS-TXID-SNAPSHOT if recovery: # Not yet possible to export a snapshot on a standby server: # http://www.postgresql.org/message-id/CAHGQGwEtJCeHUB6KzaiJ6ndvx6EFsidTGnuLwJ1itwVH0EJTOA@mail.gmail.com query = connection.execute( "SET TRANSACTION ISOLATION LEVEL READ COMMITTED, READ ONLY;" "SELECT txid_snapshot_xmin(txid_current_snapshot()), NULL;") else: query = connection.execute( "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE, READ ONLY, DEFERRABLE;" "SELECT txid_snapshot_xmin(txid_current_snapshot()), pg_export_snapshot();" ) # DEFERRABLE prevents query cancelling due to conflicts but requires SERIALIZABLE mode # which is not available in recovery. result, = query.fetchall() xmin, snapshot_id = result # lowest xid that is still in progress first_txn = None last_xmin = None if 'last_xmin' in request.json: last_xmin = request.json['last_xmin'] else: try: status = es.get(index=INDEX, doc_type='meta', id='indexing') except NotFoundError: pass else: last_xmin = status['_source']['xmin'] result = { 'xmin': xmin, 'last_xmin': last_xmin, } if last_xmin is None: result['types'] = types = request.json.get('types', None) invalidated = all_uuids(request.root, types) else: txns = session.query(TransactionRecord).filter( TransactionRecord.xid >= last_xmin, ) invalidated = set() updated = set() renamed = set() max_xid = 0 txn_count = 0 for txn in txns.all(): txn_count += 1 max_xid = max(max_xid, txn.xid) if first_txn is None: first_txn = txn.timestamp else: first_txn = min(first_txn, txn.timestamp) renamed.update(txn.data.get('renamed', ())) updated.update(txn.data.get('updated', ())) result['txn_count'] = txn_count if txn_count == 0: return result es.indices.refresh(index=INDEX) res = es.search(index=INDEX, size=SEARCH_MAX, body={ 'filter': { 'or': [ { 'terms': { 'embedded_uuids': updated, '_cache': False, }, }, { 'terms': { 'linked_uuids': renamed, '_cache': False, }, }, ], }, '_source': False, }) if res['hits']['total'] > SEARCH_MAX: invalidated = all_uuids(request.root) else: referencing = {hit['_id'] for hit in res['hits']['hits']} invalidated = referencing | updated result.update( max_xid=max_xid, renamed=renamed, updated=updated, referencing=len(referencing), invalidated=len(invalidated), txn_count=txn_count, first_txn_timestamp=first_txn.isoformat(), ) if not dry_run: result['indexed'] = indexer.update_objects(request, invalidated, xmin, snapshot_id) if record: es.index(index=INDEX, doc_type='meta', body=result, id='indexing') es.indices.refresh(index=INDEX) if first_txn is not None: result['lag'] = str(datetime.datetime.now(pytz.utc) - first_txn) return result
def index(request): INDEX = request.registry.settings['contentbase.elasticsearch.index'] # Setting request.datastore here only works because routed views are not traversed. request.datastore = 'database' record = request.json.get('record', False) dry_run = request.json.get('dry_run', False) recovery = request.json.get('recovery', False) es = request.registry[ELASTIC_SEARCH] indexer = request.registry[INDEXER] session = DBSession() connection = session.connection() # http://www.postgresql.org/docs/9.3/static/functions-info.html#FUNCTIONS-TXID-SNAPSHOT if recovery: # Not yet possible to export a snapshot on a standby server: # http://www.postgresql.org/message-id/CAHGQGwEtJCeHUB6KzaiJ6ndvx6EFsidTGnuLwJ1itwVH0EJTOA@mail.gmail.com query = connection.execute( "SET TRANSACTION ISOLATION LEVEL READ COMMITTED, READ ONLY;" "SELECT txid_snapshot_xmin(txid_current_snapshot()), NULL;" ) else: query = connection.execute( "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE, READ ONLY, DEFERRABLE;" "SELECT txid_snapshot_xmin(txid_current_snapshot()), pg_export_snapshot();" ) # DEFERRABLE prevents query cancelling due to conflicts but requires SERIALIZABLE mode # which is not available in recovery. result, = query.fetchall() xmin, snapshot_id = result # lowest xid that is still in progress first_txn = None last_xmin = None if 'last_xmin' in request.json: last_xmin = request.json['last_xmin'] else: try: status = es.get(index=INDEX, doc_type='meta', id='indexing') except NotFoundError: pass else: last_xmin = status['_source']['xmin'] result = { 'xmin': xmin, 'last_xmin': last_xmin, } if last_xmin is None: result['types'] = types = request.json.get('types', None) invalidated = all_uuids(request.root, types) else: txns = session.query(TransactionRecord).filter( TransactionRecord.xid >= last_xmin, ) invalidated = set() updated = set() renamed = set() max_xid = 0 txn_count = 0 for txn in txns.all(): txn_count += 1 max_xid = max(max_xid, txn.xid) if first_txn is None: first_txn = txn.timestamp else: first_txn = min(first_txn, txn.timestamp) renamed.update(txn.data.get('renamed', ())) updated.update(txn.data.get('updated', ())) result['txn_count'] = txn_count if txn_count == 0: return result es.indices.refresh(index=INDEX) res = es.search(index=INDEX, size=SEARCH_MAX, body={ 'filter': { 'or': [ { 'terms': { 'embedded_uuids': updated, '_cache': False, }, }, { 'terms': { 'linked_uuids': renamed, '_cache': False, }, }, ], }, '_source': False, }) if res['hits']['total'] > SEARCH_MAX: invalidated = all_uuids(request.root) else: referencing = {hit['_id'] for hit in res['hits']['hits']} invalidated = referencing | updated result.update( max_xid=max_xid, renamed=renamed, updated=updated, referencing=len(referencing), invalidated=len(invalidated), txn_count=txn_count, first_txn_timestamp=first_txn.isoformat(), ) if not dry_run: result['indexed'] = indexer.update_objects(request, invalidated, xmin, snapshot_id) if record: es.index(index=INDEX, doc_type='meta', body=result, id='indexing') es.indices.refresh(index=INDEX) if first_txn is not None: result['lag'] = str(datetime.datetime.now(pytz.utc) - first_txn) return result