def update_schema(engine): """ Instead of versioning our schema, this function ensures we are running against the latest version of the database schema. """ is_unification = pg_exists(engine, schema_qualified('dataset_type')) if not is_unification: raise ValueError('Pre-unification database cannot be updated.') # Removal of surrogate key from dataset_source: it makes the table larger for no benefit. if pg_exists(engine, schema_qualified('uq_dataset_source_dataset_ref')): _LOG.info('Applying surrogate-key update') engine.execute(""" begin; alter table {schema}.dataset_source drop constraint pk_dataset_source; alter table {schema}.dataset_source drop constraint uq_dataset_source_dataset_ref; alter table {schema}.dataset_source add constraint pk_dataset_source primary key(dataset_ref, classifier); alter table {schema}.dataset_source drop column id; commit; """.format(schema=SCHEMA_NAME)) _LOG.info('Completed surrogate-key update') # float8range is needed if the user uses the double-range field type. if not engine.execute( "SELECT 1 FROM pg_type WHERE typname = 'float8range'").scalar(): engine.execute(TYPES_INIT_SQL) if not pg_column_exists(engine, schema_qualified('dataset_location'), 'archived'): _LOG.info('Applying dataset_location.archived update') engine.execute(""" alter table {schema}.dataset_location add column archived TIMESTAMP WITH TIME ZONE """.format(schema=SCHEMA_NAME)) _LOG.info('Completed dataset_location.archived update') # Update uri indexes to allow dataset search-by-uri. if not pg_exists( engine, schema_qualified('ix_{schema}_dataset_location_dataset_ref'.format( schema=SCHEMA_NAME))): _LOG.info('Applying uri-search update') engine.execute(""" begin; -- Add a separate index by dataset. create index ix_{schema}_dataset_location_dataset_ref on {schema}.dataset_location (dataset_ref); -- Replace (dataset, uri) index with (uri, dataset) index. alter table {schema}.dataset_location add constraint uq_dataset_location_uri_scheme unique (uri_scheme, uri_body, dataset_ref); alter table {schema}.dataset_location drop constraint uq_dataset_location_dataset_ref; commit; """.format(schema=SCHEMA_NAME)) _LOG.info('Completed uri-search update')
def schema_is_latest(engine): """ Is the schema up-to-date? """ # We may have versioned schema in the future. # For now, we know updates have been applied if certain objects exist, location_first_index = 'ix_{schema}_dataset_location_dataset_ref'.format( schema=SCHEMA_NAME) has_dataset_source_update = not pg_exists( engine, schema_qualified('uq_dataset_source_dataset_ref')) has_uri_searches = pg_exists(engine, schema_qualified(location_first_index)) has_dataset_location = pg_column_exists( engine, schema_qualified('dataset_location'), 'archived') return has_dataset_source_update and has_uri_searches and has_dataset_location
def schema_is_latest(engine): """ Over the lifetime of ODC there have been a couple of schema updates. For now we don't version the schema, but we do need to check we're running against the latest. We may have a versioned schema in the future. For now, we know updates have been applied if certain objects exist. """ # We may have versioned schema in the future. # For now, we know updates have been applied if certain objects exist, location_first_index = 'ix_{schema}_dataset_location_dataset_ref'.format(schema=SCHEMA_NAME) has_dataset_source_update = not pg_exists(engine, schema_qualified('uq_dataset_source_dataset_ref')) has_uri_searches = pg_exists(engine, schema_qualified(location_first_index)) has_dataset_location = pg_column_exists(engine, schema_qualified('dataset_location'), 'archived') return has_dataset_source_update and has_uri_searches and has_dataset_location