def check_open_with_dc(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', variables=['blue'], stack='variable') assert data_array.shape data_array = dc.load(product='ls5_nbar_albers', latitude=(-34, -35), longitude=(149, 150), stack='variable') assert data_array.shape dataset = dc.load(product='ls5_nbar_albers', variables=['blue']) assert dataset['blue'].size dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2)) assert dataset['blue'].size data_array = dc.load(product='ls5_nbar_albers', latitude=(-34, -35), longitude=(149, 150), variables=['blue'], group_by='solar_day') products_df = dc.list_products() assert len(products_df) assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])]) assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])]) assert len(dc.list_measurements())
def check_legacy_open(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000', use_threads=True) assert data_array['blue'].shape[0] == 1 assert (data_array.blue != -999).any() # force fusing load by duplicating dataset dss = dc.find_datasets(product='ls5_nbar_albers', time='1992-03-23T23:14:25.500000') assert len(dss) == 1 dss = dss*2 sources = dc.group_datasets(dss, query_group_by('time')) gbox = data_array.geobox mm = [dss[0].type.measurements['blue']] xx = dc.load_data(sources, gbox, mm) assert (xx == data_array).all() with rasterio.Env(): xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1}) assert xx_lazy['blue'].data.dask assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
def test_end_to_end_multitime(clirunner, index, product_def, original_data): """Test simple indexing but for multiple measurements and wavelengths.""" dc = Datacube(index=index) # Add the GEDI Dataset Types clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)]) for idx, measurement in enumerate(product_def.measurements): for product_id in GEDI_PRODUCT_IDS: index_yaml = str(product_def.index_yaml).format( product_id=product_id.pid, measurement=measurement, ) # Index the Datasets clirunner(["-v", "dataset", "add", str(index_yaml)]) if idx == 0: # Full check for the first measurement only # Check data for all product IDs check_open_with_dc_contents(dc, product_def, GEDI_PRODUCT_IDS, measurement, original_data) # check_open_with_grid_workflow(index) # Only test first product ID with dss check_load_via_dss(dc, product_def, GEDI_PRODUCT_IDS[:1], measurement, original_data) else: check_open_with_dc_simple(dc, product_def, GEDI_PRODUCT_IDS, measurement)
def test_indexing(clirunner, index, product_def): """Test indexing features for 2D and 3D products. A few no-op indexing commands are tested as well as a simple load with shape check only. """ product_id = GEDI_PRODUCT_IDS[0] measurement = product_def.measurements[0] index_yaml = str(product_def.index_yaml).format( product_id=product_id.pid, measurement=measurement, ) # Add the GEDI Dataset Types clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)]) # Index the Datasets # - do test run first to increase test coverage clirunner(["-v", "dataset", "add", "--dry-run", str(index_yaml)]) # - do actual indexing clirunner(["-v", "dataset", "add", str(index_yaml)]) # - this will be no-op but with ignore lineage clirunner([ "-v", "dataset", "add", "--confirm-ignore-lineage", str(index_yaml), ]) # Test no-op update for policy in ["archive", "forget", "keep"]: clirunner([ "-v", "dataset", "update", "--dry-run", "--location-policy", policy, str(index_yaml), ]) # Test no changes needed update clirunner([ "-v", "dataset", "update", "--location-policy", policy, str(index_yaml), ]) dc = Datacube(index=index) check_open_with_dc_simple(dc, product_def, [product_id], measurement)
def ingest_cmd(index, config, dry_run, executor): _, config = next(read_documents(Path(config))) source_type = index.datasets.types.get_by_name(config['source_type']) if not source_type: _LOG.error("Source DatasetType %s does not exist", config['source_type']) # print (source_type) # print ("abcdefghijklmnopqrstuvwxyz") output_type = morph_dataset_type(source_type, config) # print (output_type) _LOG.info('Created DatasetType %s', output_type.name) output_type = index.datasets.types.add(output_type) datacube = Datacube(index=index) grid_spec = output_type.grid_spec namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) file_path_template = str( Path(config['location'], config['file_path_template'])) bbox = BoundingBox(**config['ingestion_bounds']) tasks = find_diff(source_type, output_type, bbox, datacube) def ingest_work(tile_index, sources): geobox = GeoBox.from_grid_spec(grid_spec, tile_index) # print ("in ingest.py in ingest_word") data = Datacube.product_data(sources, geobox, measurements) nudata = data.rename(namemap) file_path = file_path_template.format( tile_index=tile_index, start_time=to_datetime( sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'), end_time=to_datetime( sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f')) # TODO: algorithm params print("Writing product") nudatasets = write_product(nudata, sources, output_type, config['global_attributes'], variable_params, Path(file_path)) return nudatasets do_work(tasks, ingest_work, index, executor) temp = str(Path(config['location'])) files_path = temp + "/cache" if not os.path.isfile(temp + "/archive"): os.system("mkdir " + temp + "/archive") print("Compressing files") compress(files_path)
def check_open_with_api(index): from datacube.api.core import Datacube datacube = Datacube(index=index) input_type_name = 'ls5_nbar_albers' input_type = datacube.index.datasets.types.get_by_name(input_type_name) geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577')) observations = datacube.product_observations(product='ls5_nbar_albers', geopolygon=geobox.extent) sources = datacube.product_sources(observations, lambda ds: ds.center_time, 'time', 'seconds since 1970-01-01 00:00:00') data = datacube.product_data(sources, geobox, input_type.measurements.values()) assert data.blue.shape == (1, 200, 200)
def test_indexing_with_spectral_map(clirunner, index, dataset_types): """Test indexing features with spectral map.""" product_id = GEDI_PRODUCT_IDS[0] product_def = GEDI_PRODUCTS["3D"] measurement = product_def.measurements[0] index_yaml = str(product_def.index_yaml).format( product_id=product_id.pid, measurement=measurement, ) # Add the GEDI Dataset Types clirunner(["-v", "product", "add", str(dataset_types)]) # Index the Dataset clirunner(["-v", "dataset", "add", str(index_yaml)]) dc = Datacube(index=index) check_open_with_dc_simple(dc, product_def, [product_id], measurement)
def check_load_via_dss(index): dc = Datacube(index=index) dss = dc.find_datasets(product='ls5_nbar_albers') assert len(dss) > 0 xx1 = dc.load(product='ls5_nbar_albers', measurements=['blue']) xx2 = dc.load(datasets=dss, measurements=['blue']) assert xx1.blue.shape assert (xx1.blue != -999).any() assert (xx1.blue == xx2.blue).all() xx2 = dc.load(datasets=iter(dss), measurements=['blue']) assert xx1.blue.shape assert (xx1.blue != -999).any() assert (xx1.blue == xx2.blue).all() with pytest.raises(ValueError): dc.load(measurements=['blue'])
def check_open_with_grid_workflow(index): from datacube.api.core import Datacube dc = Datacube(index=index) type_name = 'ls5_nbar_albers' dt = dc.index.datasets.types.get_by_name(type_name) from datacube.api.grid_workflow import GridWorkflow gw = GridWorkflow(dc, dt.grid_spec) cells = gw.list_cells(product=type_name) assert LBG_CELL in cells tiles = gw.list_tiles(product=type_name) assert tiles assert tiles[LBG_CELL] ts, tile = tiles[LBG_CELL].popitem() dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue']) assert dataset_cell['blue'].size dataset_cell = gw.load(LBG_CELL, tile) assert all(m in dataset_cell for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2']) tiles = gw.list_tile_stacks(product=type_name) assert tiles assert tiles[LBG_CELL] tile = tiles[LBG_CELL] dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue']) assert dataset_cell['blue'].size dataset_cell = gw.load(LBG_CELL, tile) assert all(m in dataset_cell for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
def with_datacube(index, *args, **kwargs): return f(Datacube(index=index), *args, **kwargs)
def check_open_with_dc(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], stack='variable') assert data_array.shape assert (data_array != -999).any() data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000') assert data_array['blue'].shape[0] == 1 assert (data_array.blue != -999).any() data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], latitude=-35.3, longitude=149.1) assert data_array['blue'].shape[1:] == (1, 1) assert (data_array.blue != -999).any() data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), stack='variable') assert data_array.ndim == 4 assert 'variable' in data_array.dims assert (data_array != -999).any() with rasterio.Env(): lazy_data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), stack='variable', dask_chunks={'time': 1, 'x': 1000, 'y': 1000}) assert lazy_data_array.data.dask assert lazy_data_array.ndim == data_array.ndim assert 'variable' in lazy_data_array.dims assert lazy_data_array[1, :2, 950:1050, 950:1050].equals(data_array[1, :2, 950:1050, 950:1050]) dataset = dc.load(product='ls5_nbar_albers', measurements=['blue']) assert dataset['blue'].size dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2)) assert dataset['blue'].size with rasterio.Env(): lazy_dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2), dask_chunks={'time': 1}) assert lazy_dataset['blue'].data.dask assert lazy_dataset.blue[:2, :100, :100].equals(dataset.blue[:2, :100, :100]) assert lazy_dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050)).equals( dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050))) dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset) assert (dataset.blue == dataset_like.blue).all() data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), measurements=['blue'], group_by='solar_day') dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2), align=(5, 20)) assert dataset.geobox.affine.f % abs(dataset.geobox.affine.e) == 5 assert dataset.geobox.affine.c % abs(dataset.geobox.affine.a) == 20 dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset) assert (dataset.blue == dataset_like.blue).all() products_df = dc.list_products() assert len(products_df) assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])]) assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])]) assert len(dc.list_measurements()) resamp = ['nearest', 'cubic', 'bilinear', 'cubic_spline', 'lanczos', 'average'] results = {} # WTF def calc_max_change(da): midline = int(da.shape[0] * 0.5) a = int(abs(da[midline, :-1].data - da[midline, 1:].data).max()) centerline = int(da.shape[1] * 0.5) b = int(abs(da[:-1, centerline].data - da[1:, centerline].data).max()) return a + b for resamp_meth in resamp: dataset = dc.load(product='ls5_nbar_albers', measurements=['blue'], latitude=(-35.28, -35.285), longitude=(149.15, 149.155), output_crs='EPSG:4326', resolution=(-0.0000125, 0.0000125), resampling=resamp_meth) results[resamp_meth] = calc_max_change(dataset.blue.isel(time=0)) assert results['cubic_spline'] < results['nearest'] assert results['lanczos'] < results['average']
def with_index(driver_manager, *args, **kwargs): return f(Datacube(driver_manager=driver_manager), *args, **kwargs)
def test_end_to_end(clirunner, index, testdata_dir, ingest_configs, datacube_env_name): """ Loads two dataset configurations, then ingests a sample Landsat 5 scene One dataset configuration specifies Australian Albers Equal Area Projection, the other is simply latitude/longitude. The input dataset should be recorded in the index, and two sets of storage units should be created on disk and recorded in the index. """ lbg_nbar = testdata_dir / 'lbg' / LBG_NBAR lbg_pq = testdata_dir / 'lbg' / LBG_PQ ls5_nbar_albers_ingest_config = testdata_dir / ingest_configs[ 'ls5_nbar_albers'] ls5_pq_albers_ingest_config = testdata_dir / ingest_configs['ls5_pq_albers'] # Add the LS5 Dataset Types clirunner(['-v', 'product', 'add', str(LS5_DATASET_TYPES)]) # Index the Datasets # - do test run first to increase test coverage clirunner( ['-v', 'dataset', 'add', '--dry-run', str(lbg_nbar), str(lbg_pq)]) # - do actual indexing clirunner(['-v', 'dataset', 'add', str(lbg_nbar), str(lbg_pq)]) # - this will be no-op but with ignore lineage clirunner([ '-v', 'dataset', 'add', '--confirm-ignore-lineage', str(lbg_nbar), str(lbg_pq) ]) # Test no-op update for policy in ['archive', 'forget', 'keep']: clirunner([ '-v', 'dataset', 'update', '--dry-run', '--location-policy', policy, str(lbg_nbar), str(lbg_pq) ]) # Test no changes needed update clirunner([ '-v', 'dataset', 'update', '--location-policy', policy, str(lbg_nbar), str(lbg_pq) ]) # TODO: test location update # 1. Make a copy of a file # 2. Call dataset update with archive/forget # 3. Check location # Ingest NBAR clirunner(['-v', 'ingest', '-c', str(ls5_nbar_albers_ingest_config)]) # Ingest PQ clirunner(['-v', 'ingest', '-c', str(ls5_pq_albers_ingest_config)]) dc = Datacube(index=index) assert isinstance(str(dc), str) assert isinstance(repr(dc), str) with pytest.raises(ValueError): dc.find_datasets(time='2019') # no product supplied, raises exception check_open_with_dc(index) check_open_with_grid_workflow(index) check_load_via_dss(index)