def get_mapped_crses(*product_names: str, index: Index = None) -> Iterable[Dict]: with Datacube(index=index) as dc: index = dc.index for product_name in product_names: product = index.products.get_by_name(product_name) # SQLAlchemy queries require "column == None", not "column is None" due to operator overloading: # pylint: disable=singleton-comparison res = ( alchemy_engine(index) .execute( select( [ literal(product.name).label("product"), get_dataset_srid_alchemy_expression( product.metadata_type ).label("crs"), ] ) .where(DATASET.c.dataset_type_ref == product.id) .where(DATASET.c.archived == None) .limit(1) ) .fetchone() ) if res: yield dict(res)
def test_allows_null_product_fixed_fields( all_urls, client: FlaskClient, module_index: Index, summary_store: SummaryStore, ): """ Pages should not fall over when fixed_metadata is null. Older versions of cubedash-gen don't write the fixed_metadata column, so it can be null in legacy and migrated deployments. (and null is desired behaviour here: null indicates "not known", while "empty dict" indicates there are zero fields of metadata) """ # WHEN we have some products summarised assert (summary_store.list_complete_products() ), "There's no summarised products to test" # AND there's some with null fixed_metadata (ie. pre-Explorer0-EO3-update) update_count = (_utils.alchemy_engine(module_index).execute( f"update {_schema.PRODUCT.fullname} set fixed_metadata = null"). rowcount) assert update_count > 0, "There were no test products to update?" # THEN All pages should still render fine. assert_all_urls_render(all_urls, client)
def __init__(self, index: Index, summariser: Summariser, log=_LOG) -> None: self.index = index self.log = log self._update_listeners = [] self._engine: Engine = _utils.alchemy_engine(index) self._summariser = summariser
def summary_store(module_dea_index: Index) -> SummaryStore: store = SummaryStore.create(module_dea_index) store.drop_all() module_dea_index.close() store.init() _make_all_tables_unlogged(_utils.alchemy_engine(module_dea_index), CUBEDASH_METADATA) return store
def _change_dataset_product(index: Index, dataset_id: UUID, other_product: DatasetType): rows_changed = (_utils.alchemy_engine(index).execute( f"update {_utils.ODC_DATASET.fullname} set dataset_type_ref=%s where id=%s", other_product.id, dataset_id, ).rowcount) assert rows_changed == 1
def create(cls, index: Index, init_schema=False, log=_LOG) -> 'SummaryStore': return cls(index, Summariser(alchemy_engine(index)), init_schema=init_schema, log=log)
def get_sample_dataset(*product_names: str, index: Index = None) -> Iterable[Dict]: with Datacube(index=index) as dc: index = dc.index for product_name in product_names: product = index.products.get_by_name(product_name) res = (alchemy_engine(index).execute( _select_dataset_extent_query(product).limit(1)).fetchone()) if res: yield dict(res)
def get_sample_dataset(*product_names: str, index: Index = None) -> Iterable[Dict]: with Datacube(index=index) as dc: index = dc.index for product_name in product_names: product = index.products.get_by_name(product_name) res = (alchemy_engine(index).execute( select(_select_dataset_extent_columns(product)).where( DATASET.c.dataset_type_ref == bindparam( "product_ref", product.id, type_=SmallInteger)).where( DATASET.c.archived == None).limit(1)).fetchone()) if res: yield dict(res)
def test_force_dataset_regeneration(run_generate, summary_store: SummaryStore, module_index: Index): """ We should be able to force-replace dataset extents with the "--recreate-dataset-extents" option """ run_generate("ls8_nbar_albers") [example_dataset ] = summary_store.index.datasets.search_eager(product="ls8_nbar_albers", limit=1) original_footprint = summary_store.get_dataset_footprint_region( example_dataset.id) assert original_footprint is not None # Now let's break the footprint! alchemy_engine(module_index).execute( f"update {CUBEDASH_SCHEMA}.dataset_spatial " " set footprint=" " ST_SetSRID(" " ST_GeomFromText(" " 'POLYGON((-71.1776585052917 42.3902909739571,-71.1776820268866 42.3903701743239," " -71.1776063012595 42.3903825660754,-71.1775826583081 42.3903033653531," " -71.1776585052917 42.3902909739571))'" " )," " 4326" " )" " where id=%s", example_dataset.id, ) # Make sure it worked footprint = summary_store.get_dataset_footprint_region(example_dataset.id) assert footprint != original_footprint, "Test data didn't successfully override" # Now force-recreate dataset extents run_generate("-v", "ls8_nbar_albers", "--recreate-dataset-extents") # ... and they should be correct again footprint = summary_store.get_dataset_footprint_region(example_dataset.id) assert footprint == original_footprint, "Dataset extent was not regenerated"
def refresh_product(index: Index, product: DatasetType): engine: Engine = alchemy_engine(index) insert_count = _populate_missing_dataset_extents(engine, product) # If we inserted data... if insert_count: # And it's a non-spatial product... if get_dataset_extent_alchemy_expression( product.metadata_type) is None: # And it has WRS path/rows... if "sat_path" in product.metadata_type.dataset_fields: # We can synthesize the polygons! _LOG.debug( "spatial_synthesizing.start", product_name=product.name, ) shapes = _get_path_row_shapes() rows = [ row for row in index.datasets.search_returning( ("id", "sat_path", "sat_row"), product=product.name) if row.sat_path.lower is not None ] if rows: engine.execute( DATASET_SPATIAL.update().where( DATASET_SPATIAL.c.id == bindparam("dataset_id")). values(footprint=bindparam("footprint")), [ dict( dataset_id=id_, footprint=from_shape( shapely.ops.unary_union([ shapes[(int(sat_path.lower), row)] for row in range( int(sat_row.lower), int(sat_row.upper) + 1, ) ]), srid=4326, extended=True, ), ) for id_, sat_path, sat_row in rows ], ) _LOG.debug( "spatial_synthesizing.done", product_name=product.name, ) return insert_count
def __init__(self, index: Index, summariser: Summariser, init_schema=False, log=_LOG) -> None: self.index = index self.log = log self._update_listeners = [] self._engine: Engine = alchemy_engine(index) self._summariser = summariser if init_schema: _schema.create_schema(self._engine)
def test_cubedash_gen_refresh(module_index): """ Test cubedash get with refresh does not increment sequence """ runner = CliRunner() res = runner.invoke(cli, ["--init"]) assert res engine = alchemy_engine(module_index) last_val = engine.execute( "select last_value from cubedash.product_id_seq;").fetchone()[0] assert last_val == 74 res = runner.invoke( cli, ["--no-init-database", "--refresh-stats", "--force-refresh", "--all"]) assert res new_last_val = engine.execute( "select last_value from cubedash.product_id_seq;").fetchone()[0] assert new_last_val == 74
def refresh_product(index: Index, product: DatasetType): engine: Engine = alchemy_engine(index) insert_count = _populate_missing_dataset_extents(engine, product) return insert_count
def refresh_spatial_extents( index: Index, product: DatasetType, clean_up_deleted=False, assume_after_date: datetime = None, ): """ Update the spatial extents to match any changes upstream in ODC. :param assume_after_date: Only scan datasets that have changed after the given (db server) time. If None, all datasets will be regenerated. :param clean_up_deleted: Scan for any manually deleted rows too. Slow. """ engine: Engine = alchemy_engine(index) log = _LOG.bind(product_name=product.name, after_date=assume_after_date) # First, remove any archived datasets from our spatial table. datasets_to_delete = (select([DATASET.c.id]).where( DATASET.c.archived.isnot(None)).where( DATASET.c.dataset_type_ref == product.id)) if assume_after_date is not None: # Note that we use "dataset_changed_expression" to scan the datasets, # rather than "where archived > date", because the latter has no index! # (.... and we're using dataset_changed_expression's index everywhere else, # so it's probably still in memory and super fast!) datasets_to_delete = datasets_to_delete.where( dataset_changed_expression() > assume_after_date) log.info("spatial_archival", ) changed = engine.execute(DATASET_SPATIAL.delete().where( DATASET_SPATIAL.c.id.in_(datasets_to_delete))).rowcount log.info( "spatial_archival.end", change_count=changed, ) # Forcing? Check every other dataset for removal, so we catch manually-deleted rows from the table. if clean_up_deleted: log.warning("spatial_deletion_full_scan", ) changed += engine.execute( DATASET_SPATIAL.delete().where( DATASET_SPATIAL.c.dataset_type_ref == product.id, ) # Where it doesn't exist in the ODC dataset table. .where(~DATASET_SPATIAL.c.id.in_( select([DATASET.c.id]).where( DATASET.c.dataset_type_ref == product.id, )))).rowcount log.info( "spatial_deletion_scan.end", change_count=changed, ) # We'll update first, then insert new records. # -> We do it in this order so that inserted records aren't immediately updated. # (Note: why don't we do this in one upsert? Because we get our sqlalchemy expressions # through ODC's APIs and can't choose alternative table aliases to make sub-queries. # Maybe you can figure out a workaround, though?) column_values = { c.name: c for c in _select_dataset_extent_columns(product) } only_where = [ DATASET.c.dataset_type_ref == bindparam("product_ref", product.id, type_=SmallInteger), DATASET.c.archived.is_(None), ] if assume_after_date is not None: only_where.append(dataset_changed_expression() > assume_after_date) else: log.warning("spatial_update.recreating_everything") # Update any changed datasets log.info( "spatial_update", product_name=product.name, after_date=assume_after_date, ) changed += engine.execute( DATASET_SPATIAL.update().values(**column_values).where( DATASET_SPATIAL.c.id == column_values["id"]).where( and_(*only_where))).rowcount log.info("spatial_update.end", product_name=product.name, change_count=changed) # ... and insert new ones. log.info( "spatial_insert", product_name=product.name, after_date=assume_after_date, ) changed += engine.execute( postgres.insert(DATASET_SPATIAL).from_select( column_values.keys(), select(column_values.values()).where(and_(*only_where)).order_by( column_values["center_time"]), ).on_conflict_do_nothing(index_elements=["id"])).rowcount log.info("spatial_insert.end", product_name=product.name, change_count=changed) # If we changed data... if changed: # And it's a non-spatial product... if get_dataset_extent_alchemy_expression( product.metadata_type) is None: # And it has WRS path/rows... if "sat_path" in product.metadata_type.dataset_fields: # We can synthesize the polygons! log.info("spatial_synthesizing", ) shapes = _get_path_row_shapes() rows = [ row for row in index.datasets.search_returning( ("id", "sat_path", "sat_row"), product=product.name) if row.sat_path.lower is not None ] if rows: engine.execute( DATASET_SPATIAL.update().where( DATASET_SPATIAL.c.id == bindparam("dataset_id")). values(footprint=bindparam("footprint")), [ dict( dataset_id=id_, footprint=from_shape( shapely.ops.unary_union([ shapes[(int(sat_path.lower), row)] for row in range( int(sat_row.lower), int(sat_row.upper) + 1, ) ]), srid=4326, extended=True, ), ) for id_, sat_path, sat_row in rows ], ) log.info("spatial_synthesizing.end", ) return changed
def _get_product_seq_value(): [new_val] = (alchemy_engine(module_index).execute( f"select last_value from {CUBEDASH_SCHEMA}.product_id_seq;"). fetchone()) return new_val
def create(cls, index: Index, log=_LOG) -> "SummaryStore": return cls(index, Summariser(_utils.alchemy_engine(index)), log=log)