def test_band_info(): bands = [dict(name=n, dtype='uint8', units='K', nodata=33, path=n+'.tiff') for n in 'a b c'.split(' ')] ds = mk_sample_dataset(bands, uri='file:///tmp/datataset.yml', format='GeoTIFF') binfo = BandInfo(ds, 'b') assert binfo.name == 'b' assert binfo.band is None assert binfo.layer is None assert binfo.dtype == 'uint8' assert binfo.transform is None assert binfo.crs is None assert binfo.units == 'K' assert binfo.nodata == 33 assert binfo.uri == 'file:///tmp/b.tiff' assert binfo.format == ds.format assert binfo.driver_data is None assert binfo.uri_scheme == 'file' with pytest.raises(ValueError): BandInfo(ds, 'no_such_band') # Check case where dataset is missing band that is present in the product del ds.metadata_doc['image']['bands']['c'] with pytest.raises(ValueError): BandInfo(ds, 'c') ds.uris = [] with pytest.raises(ValueError): BandInfo(ds, 'a') ds.uris = None with pytest.raises(ValueError): BandInfo(ds, 'a') ds_none_fmt = mk_sample_dataset(bands, uri='file:///tmp/datataset.yml', format=None) assert ds_none_fmt.format is None assert BandInfo(ds_none_fmt, 'a').format == '' ds = mk_sample_dataset(bands, uri='/not/a/uri') band = BandInfo(ds, 'a') assert(band.uri_scheme is '')
def test_dataset_basics(): ds = mk_sample_dataset([dict(name='a')]) assert ds == ds assert ds != "33" assert (ds == "33") is False assert str(ds) == repr(ds) ds = mk_sample_dataset([dict(name='a')], uri=None, geobox=None) assert ds.uris == [] assert ds.uri_scheme == '' assert ds.crs is None assert ds.bounds is None assert ds.extent is None assert ds.transform is None
def test_group_datasets_by_time(): bands = [dict(name='a')] # Same time instant but one explicitly marked as UTC ds1 = mk_sample_dataset(bands, timestamp="2019-01-01T23:24:00Z") ds2 = mk_sample_dataset(bands, timestamp="2019-01-01T23:24:00") # Same "time" but in a different timezone, and actually later ds3 = mk_sample_dataset(bands, timestamp="2019-01-01T23:24:00-1") assert ds1.center_time.tzinfo is not None assert ds2.center_time.tzinfo is None assert ds3.center_time.tzinfo is not None xx = Datacube.group_datasets([ds1, ds2, ds3], 'time') assert xx.time.shape == (2, ) assert len(xx.data[0]) == 2 assert len(xx.data[1]) == 1
def test_new_xr_load(data_folder): base = "file://" + str(data_folder) + "/metadata.yml" rdr = mk_rio_driver() assert rdr is not None _bands = [] def band_info_collector(bands, ctx): for b in bands: _bands.append(b) tee_new_load_context(rdr, band_info_collector) band_a = dict(name='a', path='test.tif') band_b = dict(name='b', band=2, path='test.tif') ds = mk_sample_dataset([band_a, band_b], base) sources = Datacube.group_datasets([ds], 'time') im, meta = rio_slurp(str(data_folder) + '/test.tif') measurements = [ds.type.measurements[n] for n in ('a', 'b')] xx, _ = xr_load(sources, meta.gbox, measurements, rdr) assert len(_bands) == 2 assert im[0].shape == xx.a.isel(time=0).shape assert im[1].shape == xx.b.isel(time=0).shape np.testing.assert_array_equal(im[0], xx.a.values[0]) np.testing.assert_array_equal(im[1], xx.b.values[0])
def gen_tiff_dataset(bands, base_folder, prefix='', timestamp='2018-07-19', **kwargs): """ each band: .name - string .values - ndarray .nodata - numeric|None :returns: (Dataset, GeoBox) """ if not isinstance(bands, Sequence): bands = (bands, ) # write arrays to disk and construct compatible measurement definitions gbox = None mm = [] for band in bands: name = band.name fname = prefix + name + '.tiff' meta = write_gtiff(base_folder / fname, band.values, nodata=band.nodata, overwrite=True, **kwargs) gbox = meta.gbox mm.append(dict(name=name, path=fname, layer=1, dtype=meta.dtype)) uri = Path(base_folder / 'metadata.yaml').absolute().as_uri() ds = mk_sample_dataset(mm, uri=uri, timestamp=timestamp) return ds, gbox
def test_band_info(): bands = [ dict(name=n, dtype='uint8', units='K', nodata=33, path=n + '.tiff') for n in 'a b c'.split(' ') ] ds = mk_sample_dataset(bands, uri='file:///tmp/datataset.yml', format='GeoTIFF') binfo = BandInfo(ds, 'b') assert binfo.name == 'b' assert binfo.band is None assert binfo.layer is None assert binfo.dtype == 'uint8' assert binfo.transform is None assert binfo.crs is None assert binfo.units == 'K' assert binfo.nodata == 33 assert binfo.center_time == ds.center_time assert binfo.uri == 'file:///tmp/b.tiff' assert binfo.format == ds.format assert binfo.driver_data is None assert binfo.uri_scheme == 'file' with pytest.raises(ValueError): BandInfo(ds, 'no_such_band') ds.uris = [] with pytest.raises(ValueError): BandInfo(ds, 'a') ds.uris = None with pytest.raises(ValueError): BandInfo(ds, 'a')
def test_new_datasource_fallback(): bands = [dict(name='green', path='')] dataset = mk_sample_dataset(bands, 'file:///foo', format='GeoTiff') assert dataset.uri_scheme == 'file' rdr = new_datasource(BandInfo(dataset, 'green')) assert rdr is not None assert isinstance(rdr, RasterDatasetDataSource) # check that None format works band = BandInfo(mk_sample_dataset(bands, 'file:///file', format=None), 'green') rdr = new_datasource(band) assert rdr is not None assert isinstance(rdr, RasterDatasetDataSource)
def test_native_geobox_ingested(): from datacube.testutils.io import native_geobox from datacube.testutils.geom import AlbersGS gbox = AlbersGS.tile_geobox((15, -40)) ds = mk_sample_dataset([dict(name='a')], geobox=gbox, product_opts=dict(with_grid_spec=True)) assert native_geobox(ds) == gbox # check that dataset covering several tiles is detected as invalid ds = mk_sample_dataset([dict(name='a')], geobox=gbox.buffered(10, 10), product_opts=dict(with_grid_spec=True)) with pytest.raises(ValueError): native_geobox(ds)
def test_dataset_measurement_paths(): format = 'GeoTiff' ds = mk_sample_dataset( [dict(name=n, path=n + '.tiff') for n in 'a b c'.split(' ')], uri='file:///tmp/datataset.yml', format=format) assert ds.uri_scheme == 'file' assert ds.format == format paths = measurement_paths(ds) for k, v in paths.items(): assert v == 'file:///tmp/' + k + '.tiff'
def test_hdf5_lock_release_on_failure(): from datacube.storage._rio import RasterDatasetDataSource, _HDF5_LOCK from datacube.storage import BandInfo band = dict(name='xx', layer='xx', dtype='uint8', units='K', nodata=33) ds = mk_sample_dataset( [band], uri='file:///tmp/this_probably_doesnot_exist_37237827513/xx.nc', format=NetCDF) src = RasterDatasetDataSource(BandInfo(ds, 'xx')) with pytest.raises(OSError): with src.open(): assert False and "Did not expect to get here" assert not _HDF5_LOCK._is_owned()
def test_new_datasource_s3(): pytest.importorskip('datacube.drivers.s3.storage.s3aio.s3lio') from datacube.drivers.s3 import driver as s3_driver from datacube.drivers.s3.datasource import S3DataSource bands = [dict(name='green', path='')] dataset = mk_sample_dataset(bands, s3_driver.PROTOCOL + ':///foo', format=s3_driver.FORMAT) s3_dataset_fake = S3_dataset(macro_shape=(10, 12), numpy_type='float32') dataset.s3_metadata = {'green': {'s3_dataset': s3_dataset_fake}} assert dataset.format == s3_driver.FORMAT assert dataset.uri_scheme == s3_driver.PROTOCOL rdr = s3_driver.reader_driver_init().new_datasource(BandInfo(dataset, 'green')) assert rdr is not None assert isinstance(rdr, S3DataSource)
def mk_band( name: str, base_uri: str, path: str = '', format: str = GeoTIFF, # pylint: disable=redefined-builtin **extras) -> BandInfo: """ **extras**: layer, band, nodata, dtype, units, aliases """ band_opts = { k: extras.pop(k) for k in 'path layer band nodata dtype units aliases'.split() if k in extras } band = dict(name=name, path=path, **band_opts) ds = mk_sample_dataset([band], base_uri, format=format, **extras) return BandInfo(ds, name)
def test_dataset_measurement_paths(): format = 'GeoTiff' ds = mk_sample_dataset( [dict(name=n, path=n + '.tiff') for n in 'a b c'.split(' ')], uri='file:///tmp/datataset.yml', format=format) assert ds.local_uri == ds.uris[0] assert ds.uri_scheme == 'file' assert ds.format == format paths = measurement_paths(ds) for k, v in paths.items(): assert v == 'file:///tmp/' + k + '.tiff' ds.uris = None assert ds.local_uri is None with pytest.raises(ValueError): measurement_paths(ds)
def test_dataset_basics(): ds = mk_sample_dataset([dict(name='a')]) assert ds == ds assert ds != "33" assert (ds == "33") is False assert str(ds) == repr(ds)
def sample_ds(sample_geobox): yield mk_sample_dataset([dict(name="red")], geobox=sample_geobox)