def test_product_fixed_fields(run_generate, summary_store: SummaryStore): run_generate() albers = summary_store.get_product_summary("ls8_nbar_albers") scene = summary_store.get_product_summary("ls8_nbar_scene") telem = summary_store.get_product_summary("ls8_satellite_telemetry_data") assert scene.fixed_metadata == { "platform": "LANDSAT_8", "instrument": "OLI_TIRS", "product_type": "nbar", "format": "GeoTIFF", "gsi": "LGN", "orbit": None, } assert telem.fixed_metadata == { "platform": "LANDSAT_8", "instrument": "OLI_TIRS", "product_type": "satellite_telemetry_data", "format": "MD", "gsi": "LGN", "orbit": None, } # Ingested products carry little of the original metadata... assert albers.fixed_metadata == { "platform": "LANDSAT_8", "instrument": "OLI_TIRS", "product_type": "nbar", "format": "NetCDF", "label": None, }
def test_generate_incremental_archivals(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_scene") index = summary_store.index # When we have a summarised product... original_summary = summary_store.get("ls8_nbar_scene") original_dataset_count = original_summary.dataset_count # ... and we archive one dataset ... product_name = "ls8_nbar_scene" dataset_id = _one_dataset(index, product_name) try: index.datasets.archive([dataset_id]) # ... the next generation should catch it and update with one less dataset.... run_generate("ls8_nbar_scene") assert (summary_store.get( "ls8_nbar_scene").dataset_count == original_dataset_count - 1), "Expected dataset count to decrease after archival" finally: # Now let's restore the dataset! index.datasets.restore([dataset_id]) # It should be in the count again. # (this change should work because the new 'updated' column will be bumped on restore) run_generate("ls8_nbar_scene") assert ( summary_store.get( "ls8_nbar_scene").dataset_count == original_dataset_count ), "A dataset that was restored from archival was not refreshed by Explorer"
def test_uninitialised_overview(unpopulated_client: FlaskClient, summary_store: SummaryStore): # Populate one product, so they don't get the usage error message ("run cubedash generate") # Then load an unpopulated product. summary_store.get_or_update("ls7_nbar_albers") html = get_html(unpopulated_client, "/ls7_nbar_scene/2017") assert html.find(".coverage-region-count", first=True).text == "0 unique scenes"
def test_uninitialised_search_page(empty_client: FlaskClient, summary_store: SummaryStore): # Populate one product, so they don't get the usage error message ("run cubedash generate") summary_store.refresh("ls7_nbar_albers") # Then load a completely uninitialised product. html = get_html(empty_client, "/datasets/ls7_nbar_scene") search_results = html.find(".search-result a") assert len(search_results) == 4
def test_uninitialised_search_page(empty_client: FlaskClient, summary_store: SummaryStore): # Populate one product, so they don't get the usage error message ("run cubedash generate") summary_store.refresh_product(summary_store.index.products.get_by_name('ls7_nbar_albers')) summary_store.get_or_update('ls7_nbar_albers') # Then load a completely uninitialised product. html = get_html(empty_client, '/datasets/ls7_nbar_scene') search_results = html.find('.search-result a') assert len(search_results) == 4
def test_calc_empty(summary_store: SummaryStore): summary_store.refresh_all_products() # Should not exist. summary = summary_store.get("ls8_fake_product", year=2006, month=None, day=None) assert summary is None
def test_generate_empty_time(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_albers") # No datasets in 2018 assert (summary_store.get("ls8_nbar_albers", year=2018) is None), "There should be no datasets in 2018" # Year that does not exist for LS8 summary = summary_store.get("ls8_nbar_albers", year=2006, month=None, day=None) assert summary is None
def test_uninitialised_overview(unpopulated_client: FlaskClient, summary_store: SummaryStore): # Populate one product, so they don't get the usage error message ("run cubedash generate") # Then load an unpopulated product. summary_store.refresh("ls7_nbar_albers") html = get_html(unpopulated_client, "/ls7_nbar_scene/2017") # The page should load without error, but will display 'unknown' fields assert html.find( "h2", first=True).text == "ls7_nbar_scene: Landsat 7 NBAR 25 metre" assert "Unknown number of datasets" in html.text assert "No data: not yet generated" in html.text
def test_add_no_periods(summary_store: SummaryStore): """ All the get/update methods should work on products with no datasets. """ result, summary = summary_store.refresh("ga_ls8c_level1_3") assert result == GenerateResult.CREATED assert summary.dataset_count == 0 assert summary_store.get("ga_ls8c_level1_3", 2015, 7, 4).dataset_count == 0 result, summary = summary_store.refresh("ga_ls8c_level1_3") assert result == GenerateResult.NO_CHANGES assert summary.dataset_count == 0 assert summary_store.get("ga_ls8c_level1_3").dataset_count == 0 assert summary_store.get("ga_ls8c_level1_3", 2015, 7, None) is None
def generate_report(item: Tuple[LocalConfig, str, bool]): config, product_name, force_refresh = item log = _LOG.bind(product=product_name) store = SummaryStore.create(_get_index(config, product_name), log=log) try: product = store.index.products.get_by_name(product_name) if product is None: raise ValueError(f"Unknown product: {product_name}") # If we're going to force things, we need a time that will always update if force_refresh: refresh_time = timedelta(minutes=-1) else: refresh_time = timedelta(days=1) log.info("generate.product.refresh") store.refresh_product(product, refresh_older_than=refresh_time) log.info("generate.product.refresh.done") log.info("generate.product") updated = store.get_or_update(product.name, None, None, None, force_refresh) log.info("generate.product.done") return product_name, updated except Exception: log.exception("generate.product.error", exc_info=True) return product_name, None finally: store.index.close()
def test_generate_scene_all_time(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_scene") # All time _expect_values( summary_store.get("ls8_nbar_scene", year=None, month=None, day=None), dataset_count=3036, footprint_count=3036, time_range=Range( begin=datetime(2016, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2018, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2018, 1, 10, 3, 11, 56, tzinfo=tzutc()), timeline_period="month", timeline_count=24, crses={ "EPSG:28355", "EPSG:28349", "EPSG:28352", "EPSG:28357", "EPSG:28350", "EPSG:28351", "EPSG:28353", "EPSG:28356", "EPSG:28354", }, size_bytes=1_805_759_242_975, )
def generate_report( item: Tuple[LocalConfig, str, bool, bool] ) -> Tuple[str, Optional[TimePeriodOverview]]: config, product_name, force_refresh, recreate_dataset_extents = item log = _LOG.bind(product=product_name, force=force_refresh, extents=recreate_dataset_extents) store = SummaryStore.create(_get_index(config, product_name), log=log) try: product = store.index.products.get_by_name(product_name) if product is None: raise ValueError(f"Unknown product: {product_name}") log.info("generate.product.refresh") store.refresh_product( product, refresh_older_than=(timedelta( minutes=-1) if force_refresh else timedelta(days=1)), force_dataset_extent_recompute=recreate_dataset_extents, ) log.info("generate.product.refresh.done") log.info("generate.product") updated = store.get_or_update(product.name, force_refresh=force_refresh) log.info("generate.product.done") return product_name, updated except Exception: log.exception("generate.product.error", exc_info=True) return product_name, None finally: store.index.close()
def generate_report(item): config: LocalConfig product_name: str config, product_name = item log = _LOG.bind(product=product_name) store = SummaryStore.create(_get_index(config, product_name), log=log) try: product = store.index.products.get_by_name(product_name) if product is None: raise ValueError(f"Unknown product: {product_name}") log.info('generate.product.refresh') store.refresh_product(product) log.info('generate.product.refresh.done') log.info('generate.product') updated = store.get_or_update(product.name, None, None, None) log.info('generate.product.done') return product_name, updated except Exception: log.exception('generate.product.error', exc_info=True) return product_name, None finally: store.index.close()
def cli(config: LocalConfig, generate_all_products: bool, jobs: int, product_names: List[str], event_log_file: str, refresh_stats: bool, force_concurrently: bool, verbose: bool): """ Generate summary files for the given products """ init_logging(open(event_log_file, 'a') if event_log_file else None, verbose=verbose) index = _get_index(config, 'setup') store = SummaryStore.create(index, init_schema=True) if generate_all_products: products = sorted(store.all_dataset_types(), key=lambda p: p.name) else: products = list(_load_products(store.index, product_names)) completed, failures = run_generation( config, products, workers=jobs, ) if refresh_stats: echo("Refreshing statistics...", nl=False) store.refresh_stats(concurrently=force_concurrently) secho("done", color='green') _LOG.info('stats.refresh', ) sys.exit(failures)
def test_generate_scene_year(run_generate, summary_store: SummaryStore): run_generate() # One year _expect_values( summary_store.get("ls8_nbar_scene", year=2017, month=None, day=None), dataset_count=1792, footprint_count=1792, time_range=Range( begin=datetime(2017, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2018, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2018, 1, 10, 3, 11, 56, tzinfo=tzutc()), timeline_period="day", timeline_count=365, crses={ "EPSG:28355", "EPSG:28349", "EPSG:28352", "EPSG:28350", "EPSG:28351", "EPSG:28353", "EPSG:28356", "EPSG:28354", }, size_bytes=1_060_669_242_142, )
def test_allows_null_product_fixed_fields( all_urls, client: FlaskClient, module_index: Index, summary_store: SummaryStore, ): """ Pages should not fall over when fixed_metadata is null. Older versions of cubedash-gen don't write the fixed_metadata column, so it can be null in legacy and migrated deployments. (and null is desired behaviour here: null indicates "not known", while "empty dict" indicates there are zero fields of metadata) """ # WHEN we have some products summarised assert (summary_store.list_complete_products() ), "There's no summarised products to test" # AND there's some with null fixed_metadata (ie. pre-Explorer0-EO3-update) update_count = (_utils.alchemy_engine(module_index).execute( f"update {_schema.PRODUCT.fullname} set fixed_metadata = null"). rowcount) assert update_count > 0, "There were no test products to update?" # THEN All pages should still render fine. assert_all_urls_render(all_urls, client)
def test_generate_month(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_scene") # One Month _expect_values( summary_store.update("ls8_nbar_scene", 2017, 4, None), dataset_count=408, footprint_count=408, time_range=Range( begin=datetime(2017, 4, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2017, 5, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2017, 7, 4, 11, 18, 20, tzinfo=tzutc()), timeline_period="day", timeline_count=30, crses={ "EPSG:28355", "EPSG:28349", "EPSG:28352", "EPSG:28350", "EPSG:28351", "EPSG:28353", "EPSG:28356", "EPSG:28354", }, size_bytes=245_344_352_585, )
def test_generate_scene_all_time(run_generate, summary_store: SummaryStore): run_generate('ls8_nbar_scene') # All time _expect_values( summary_store.get( 'ls8_nbar_scene', year=None, month=None, day=None, ), dataset_count=3036, footprint_count=3036, time_range=Range(begin=datetime(2016, 1, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2018, 1, 1, 0, 0, tzinfo=DEFAULT_TZ)), newest_creation_time=datetime(2018, 1, 10, 3, 11, 56, tzinfo=tzutc()), timeline_period='month', timeline_count=24, crses={ 'EPSG:28355', 'EPSG:28349', 'EPSG:28352', 'EPSG:28357', 'EPSG:28350', 'EPSG:28351', 'EPSG:28353', 'EPSG:28356', 'EPSG:28354' }, size_bytes=1805759242975, )
def test_generate_day(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_albers") _expect_values( summary_store.get_or_update("ls8_nbar_albers", year=2017, month=5, day=2), dataset_count=29, footprint_count=29, time_range=Range( begin=datetime(2017, 5, 2, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2017, 5, 3, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2017, 10, 20, 8, 53, 26, 475_609, tzinfo=tzutc()), timeline_period="day", timeline_count=1, crses={"EPSG:3577"}, size_bytes=None, )
def test_dataset_changing_product(run_generate, summary_store: SummaryStore): """ If a dataset it updated to be in a different product, Explorer should correctly update its summaries. (this really happened at NCI previously) This is a trickier case than regular updates because everything in Explorer is product-specific. Summarising one product at a time, etc. """ run_generate("ls8_nbar_scene") index = summary_store.index dataset_id = _one_dataset(index, "ls8_nbar_scene") our_product = index.products.get_by_name("ls8_nbar_scene") other_product = index.products.get_by_name("ls8_nbar_albers") # When we have a summarised product... original_summary = summary_store.get("ls8_nbar_scene") original_dataset_count = original_summary.dataset_count try: # Move the dataset to another product _change_dataset_product(index, dataset_id, other_product) assert index.datasets.get(dataset_id).type.name == "ls8_nbar_albers" # Explorer should remove it too. print(f"Test dataset: {dataset_id}") # TODO: Make this work without a force-refresh. # It's hard because we're scanning for updated datasets in the product... # but it's not in the product. And the incremental updater misses it. # So we have to force the non-incremental updater. run_generate("ls8_nbar_albers", "ls8_nbar_scene", "--force-refresh") assert (summary_store.get( "ls8_nbar_scene").dataset_count == original_dataset_count - 1), "Expected dataset to be removed after product change" finally: # Now change it back _change_dataset_product(index, dataset_id, our_product) run_generate("ls8_nbar_albers", "ls8_nbar_scene", "--force-refresh") assert ( summary_store.get( "ls8_nbar_scene").dataset_count == original_dataset_count ), "Expected dataset to be added again after the product changed back"
def test_calc_albers_summary_with_storage(summary_store: SummaryStore): # Should not exist yet. summary = summary_store.get("ls8_nbar_albers", year=None, month=None, day=None) assert summary is None summary = summary_store.get("ls8_nbar_albers", year=2017, month=None, day=None) assert summary is None # We don't want it to add a few minutes overlap buffer, # as we add datasets and refresh immediately. summary_store.dataset_overlap_carefulness = timedelta(seconds=0) # Calculate overall summary _, summary = summary_store.refresh("ls8_nbar_albers") _expect_values( summary, dataset_count=918, footprint_count=918, time_range=Range( begin=datetime(2017, 4, 1, 0, 0, tzinfo=DEFAULT_TZ), end=datetime(2017, 6, 1, 0, 0, tzinfo=DEFAULT_TZ), ), newest_creation_time=datetime(2017, 10, 25, 23, 9, 2, 486_851, tzinfo=tzutc()), timeline_period="day", # Data spans 61 days in 2017 timeline_count=61, crses={"EPSG:3577"}, # Ingested tiles don't store their size. # TODO: probably should represent this as None instead of zero? size_bytes=0, ) original = summary_store.get("ls8_nbar_albers", 2017) # It should now return the same copy, not rebuild it. summary_store.refresh("ls8_nbar_albers") cached_s = summary_store.get("ls8_nbar_albers", 2017) assert original is not cached_s assert cached_s.dataset_count == original.dataset_count assert cached_s.summary_gen_time is not None assert (cached_s.summary_gen_time == original.summary_gen_time ), "A new, rather than cached, summary was returned"
def summary_store(module_dea_index: Index) -> SummaryStore: store = SummaryStore.create(module_dea_index) store.drop_all() module_dea_index.close() store.init() _make_all_tables_unlogged(_utils.alchemy_engine(module_dea_index), CUBEDASH_METADATA) return store
def test_generate_empty_time(run_generate, summary_store: SummaryStore): run_generate("ls8_nbar_albers") # No datasets in 2018 summary = summary_store.get_or_update("ls8_nbar_albers", year=2018, month=None, day=None) assert summary.dataset_count == 0, "There should be no datasets in 2018" # assert len(summary.timeline_dataset_counts) == 365, "Empty regions should still show up in timeline histogram" # Year that does not exist for LS8 summary = summary_store.get("ls8_nbar_albers", year=2006, month=None, day=None) assert summary is None
def test_has_source_derived_product_links(run_generate, summary_store: SummaryStore): run_generate() albers = summary_store.get_product_summary("ls8_nbar_albers") scene = summary_store.get_product_summary("ls8_nbar_scene") telem = summary_store.get_product_summary("ls8_satellite_telemetry_data") print(repr([albers, scene, telem])) assert albers.source_products == ["ls8_nbar_scene"] assert albers.derived_products == [] assert scene.source_products == ["ls8_level1_scene"] assert scene.derived_products == ["ls8_nbar_albers"] assert telem.source_products == [] assert telem.derived_products == ["ls8_level1_scene"]
def test_add_no_periods(summary_store: SummaryStore): """ All the get/update methods should work on products with no datasets. """ summary_store._set_product_extent( ProductSummary("test_empty_product", 0, None, None, [], [])) summary_store.get_or_update("test_empty_product", 2015, 7, 4) summary_store.get_or_update("test_empty_product", 2015, 7, None) summary_store.get_or_update("test_empty_product", 2015, None, None) summary_store.get_or_update("test_empty_product", None, None, None)
def cli( config: LocalConfig, generate_all_products: bool, jobs: int, product_names: List[str], event_log_file: str, refresh_stats: bool, force_concurrently: bool, verbose: bool, init_database: bool, drop_database: bool, force_refresh: bool, recreate_dataset_extents: bool, ): init_logging(open(event_log_file, "a") if event_log_file else None, verbose=verbose) index = _get_index(config, "setup") store = SummaryStore.create(index) if drop_database: user_message("Dropping all Explorer additions to the database") store.drop_all() user_message("Done. Goodbye.") sys.exit(0) if init_database: user_message("Initialising schema") store.init() elif not store.is_initialised(): user_message( style("No cubedash schema exists. ", fg="red") + "Please rerun with --init to create one", ) sys.exit(-1) elif not store.is_schema_compatible(): user_message( style("Cubedash schema is out of date. ", fg="red") + "Please rerun with --init to apply updates.", ) sys.exit(-2) if generate_all_products: products = sorted(store.all_dataset_types(), key=lambda p: p.name) else: products = list(_load_products(store.index, product_names)) completed, failures = run_generation( config, products, workers=jobs, force_refresh=force_refresh, recreate_dataset_extents=recreate_dataset_extents, ) if refresh_stats: user_message("Refreshing statistics...", nl=False) store.refresh_stats(concurrently=force_concurrently) user_message("done", color="green") _LOG.info("stats.refresh") sys.exit(failures)
def test_put_get_summaries(summary_store: SummaryStore): """ Test the serialisation/deserialisation from postgres """ o = _overview() assert o.summary_gen_time is None, "Generation time should be set by server" product_name = "some_product" summary_store._set_product_extent( ProductSummary( product_name, 4321, datetime(2017, 1, 1), datetime(2017, 4, 1), [], [], {} ) ) summary_store._put(product_name, 2017, None, None, o) loaded = summary_store.get(product_name, 2017, None, None) assert o is not loaded, ( "Store should not return the original objects " "(they may change)" ) assert ( o.summary_gen_time is not None ), "Summary-gen-time should have been added by the server" original_gen_time = o.summary_gen_time assert o.footprint_geometry.area == pytest.approx(4.857_924_619_872) assert loaded.dataset_count == 4 assert ( sum(loaded.region_dataset_counts.values()) == 4 ), "Region dataset counts don't match total count" assert sorted(loaded.region_dataset_counts.keys()) == [ "1_2", "3_4", "4_5", ], "Incorrect set of regions" assert o.footprint_crs == loaded.footprint_crs assert loaded.footprint_crs == "EPSG:3577" assert loaded.footprint_srid == 3577 assert loaded.footprint_geometry.area == pytest.approx(o.footprint_geometry.area) o.dataset_count = 4321 o.newest_dataset_creation_time = datetime(2018, 2, 2, 2, 2, 2, tzinfo=tz.tzutc()) time.sleep(1) summary_store._put(product_name, 2017, None, None, o) assert o.summary_gen_time != original_gen_time loaded = summary_store.get(product_name, 2017, None, None) assert loaded.dataset_count == 4321 assert loaded.newest_dataset_creation_time == datetime( 2018, 2, 2, 2, 2, 2, tzinfo=tz.tzutc() ) assert ( loaded.summary_gen_time != original_gen_time ), "An update should update the generation time"
def test_get_null(summary_store: SummaryStore): """ An area with nothing generated should come back as null. (It's important for us to distinguish between an area with zero datasets and an area where the summary/extent has not been generated.) """ loaded = summary_store.get("some_product", 2019, 4, None) assert loaded is None
def eo3_index(module_dea_index: Index, dataset_loader): loaded = dataset_loader( "usgs_ls5t_level1_1", TEST_EO3_DATASET_L1, ) assert loaded == 1 loaded = dataset_loader( "ga_ls5t_ard_3", TEST_EO3_DATASET_ARD, ) assert loaded == 1 # We need postgis and some support tables (eg. srid lookup). SummaryStore.create(module_dea_index).init() return module_dea_index
def eo3_index(module_dea_index: Index, dataset_loader): loaded = dataset_loader( "usgs_ls5t_level1_1", TEST_DATA_DIR / "LT05_L1TP_113081_19880330_20170209_01_T1.odc-metadata.yaml", ) assert loaded == 1 loaded = dataset_loader( "ga_ls5t_ard_3", TEST_DATA_DIR / "ga_ls5t_ard_3-1-20200605_113081_1988-03-30_final.odc-metadata.yaml", ) assert loaded == 1 # We need postgis and some support tables (eg. srid lookup). SummaryStore.create(module_dea_index).init() return module_dea_index