def test_write_dataset_with_time_dimension_to_netcdf(tmpnetcdf_filename): xx = mk_sample_xr_dataset(name='B10', time='2020-01-01') assert 'time' in xx.coords assert 'units' not in xx.time.attrs write_dataset_to_netcdf(xx, tmpnetcdf_filename, global_attributes={'foo': 'bar'}, variable_params={'B10': { 'attrs': { 'abc': 'xyz' } }}) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) assert 'B10' in nco.variables var = nco.variables['B10'] assert (var[:] == xx['B10'].values).all() assert 'foo' in nco.ncattrs() assert nco.getncattr('foo') == 'bar' assert 'abc' in var.ncattrs() assert var.getncattr('abc') == 'xyz' with pytest.raises(RuntimeError): write_dataset_to_netcdf(xx, tmpnetcdf_filename) # Check grid_mapping is a coordinate yy = xr.open_dataset(tmpnetcdf_filename) assert crs_var in yy.coords assert crs_var not in yy.data_vars assert 'time' in yy.coords
def test_write_dataset_to_netcdf(tmpnetcdf_filename, odc_style_xr_dataset): write_dataset_to_netcdf(odc_style_xr_dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'}, variable_params={'B10': { 'attrs': { 'abc': 'xyz' } }}) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) assert 'B10' in nco.variables var = nco.variables['B10'] assert (var[:] == odc_style_xr_dataset['B10'].values).all() assert 'foo' in nco.ncattrs() assert nco.getncattr('foo') == 'bar' assert 'abc' in var.ncattrs() assert var.getncattr('abc') == 'xyz' with pytest.raises(RuntimeError): write_dataset_to_netcdf(odc_style_xr_dataset, tmpnetcdf_filename) # Check grid_mapping is a coordinate xx = xr.open_dataset(tmpnetcdf_filename) assert crs_var in xx.coords assert crs_var not in xx.data_vars
def _do_fc_task(config, task): """ Load data, run FC algorithm, attach metadata, and write output. :param dict config: Config object :param dict task: Dictionary of tasks :return: Dataset objects representing the generated data that can be added to the index :rtype: list(datacube.model.Dataset) """ global_attributes = config['global_attributes'] variable_params = config['variable_params'] output_product = config['fc_product'] file_path = Path(task['filename_dataset']) uri, band_uris = calc_uris(file_path, variable_params) output_measurements = config['fc_product'].measurements.values() nbart = io.native_load(task['dataset'], measurements=config['load_bands']) if config['band_mapping'] is not None: nbart = nbart.rename(config['band_mapping']) fc_dataset = run_fc(nbart, output_measurements, config.get('sensor_regression_coefficients')) def _make_dataset(labels, sources): assert sources dataset = make_dataset(product=output_product, sources=sources, extent=nbart.geobox.extent, center_time=labels['time'], uri=uri, band_uris=band_uris, app_info=_get_app_metadata(config), valid_data=polygon_from_sources_extents( sources, nbart.geobox)) return dataset source = Datacube.group_datasets([task['dataset']], 'time') datasets = xr_apply(source, _make_dataset, dtype='O') fc_dataset['dataset'] = datasets_to_doc(datasets) base, ext = os.path.splitext(file_path) if ext == '.tif': dataset_to_geotif_yaml( dataset=fc_dataset, odc_dataset=datasets.item(), filename=file_path, variable_params=variable_params, ) else: write_dataset_to_netcdf( dataset=fc_dataset, filename=file_path, global_attributes=global_attributes, variable_params=variable_params, ) return datasets
def test_useful_error_on_write_empty_dataset(tmpnetcdf_filename): with pytest.raises(DatacubeException) as excinfo: ds = xr.Dataset() write_dataset_to_netcdf(ds, tmpnetcdf_filename) assert 'empty' in str(excinfo.value) with pytest.raises(DatacubeException) as excinfo: ds = xr.Dataset(data_vars={'blue': (('time',), numpy.array([0, 1, 2]))}) write_dataset_to_netcdf(ds, tmpnetcdf_filename) assert 'CRS' in str(excinfo.value)
def save(newWOFL, currWOFL, name, both=True): newWOFL.attrs['nodata'] = 1 if both: currWOFL['CU_water'] = newWOFL else: currWOFL = currWOFL[[]] currWOFL['water'] = newWOFL write_dataset_to_netcdf( currWOFL, f'/g/data/u46/users/bt2744/work/Projects/collection_upgrade/{name}')
def _do_fc_task(config, task): """ Load data, run FC algorithm, attach metadata, and write output. :param dict config: Config object :param dict task: Dictionary of tasks :return: Dataset objects representing the generated data that can be added to the index :rtype: list(datacube.model.Dataset) """ global_attributes = config['global_attributes'] variable_params = config['variable_params'] file_path = Path(task['filename']) output_product = config['fc_product'] if file_path.exists(): raise OSError(errno.EEXIST, 'Output file already exists', str(file_path)) nbart_tile: Tile = task['nbart'] nbart = GridWorkflow.load(nbart_tile, ['green', 'red', 'nir', 'swir1', 'swir2']) output_measurements = config['fc_product'].measurements.values() fc_dataset = _make_fc_tile(nbart, output_measurements, config.get('sensor_regression_coefficients')) def _make_dataset(labels, sources): assert sources dataset = make_dataset(product=output_product, sources=sources, extent=nbart.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=_get_app_metadata(config), valid_data=polygon_from_sources_extents( sources, nbart.geobox)) return dataset datasets = xr_apply(nbart_tile.sources, _make_dataset, dtype='O') fc_dataset['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf( dataset=fc_dataset, filename=file_path, global_attributes=global_attributes, variable_params=variable_params, ) return datasets
def test_write_dataset_to_netcdf(tmpnetcdf_filename, odc_style_xr_dataset): write_dataset_to_netcdf(odc_style_xr_dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'}, variable_params={'B10': { 'attrs': { 'abc': 'xyz' } }}) with netCDF4.Dataset(tmpnetcdf_filename) as nco: nco.set_auto_mask(False) assert 'B10' in nco.variables var = nco.variables['B10'] assert (var[:] == odc_style_xr_dataset['B10'].values).all() assert 'foo' in nco.ncattrs() assert nco.getncattr('foo') == 'bar' assert 'abc' in var.ncattrs() assert var.getncattr('abc') == 'xyz'
def _do_wofs_task(config, task): """ Load data, run WOFS algorithm, attach metadata, and write output. :param dict config: Config object :param dict task: Dictionary of task values :return: Dataset objects representing the generated data that can be added to the index :rtype: list(datacube.model.Dataset) """ # datacube.api.Tile source_tile: NBAR Tile source_tile: Tile = task['source_tile'] # datacube.api.Tile pq_tile: Pixel quality Tile pq_tile: Tile = task['pq_tile'] # datacube.api.Tile dsm_tile: Digital Surface Model Tile dsm_tile: Tile = task['dsm_tile'] # Path file_path: output file destination file_path = Path( task['file_path']) # Path file_path: output file destination product = config['wofs_dataset_type'] if file_path.exists(): _LOG.warning('Output file already exists %r', str(file_path)) # load data bands = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'] # inputs needed from EO data) source = datacube.api.GridWorkflow.load(source_tile, measurements=bands) pq = datacube.api.GridWorkflow.load(pq_tile) dsm = datacube.api.GridWorkflow.load(dsm_tile, resampling='cubic') # Core computation result = wofls.woffles(*(x.isel(time=0) for x in [source, pq, dsm])).astype(np.int16) # Convert 2D DataArray to 3D DataSet result = xarray.concat([result], dim=source.time).to_dataset(name='water') # add metadata result.water.attrs[ 'nodata'] = 1 # lest it default to zero (i.e. clear dry) result.water.attrs['units'] = '1' # unitless (convention) result.water.attrs['crs'] = source.crs # Attach CRS. Note this is poorly represented in NetCDF-CF # (and unrecognised in xarray), likely improved by datacube-API model. result.attrs['crs'] = source.crs # Provenance tracking parent_sources = [ ds for tile in [source_tile, pq_tile, dsm_tile] for ds in tile.sources.values[0] ] # Create indexable record new_record = datacube.model.utils.make_dataset( product=product, sources=parent_sources, center_time=result.time.values[0], uri=file_path.absolute().as_uri(), extent=source_tile.geobox.extent, valid_data=task['valid_region'], app_info=_get_app_metadata(config)) def harvest(what, tile): """ Inherit optional metadata from EO, for future convenience only """ datasets = [ ds for source_datasets in tile.sources.values for ds in source_datasets ] values = [dataset.metadata_doc[what] for dataset in datasets] assert all(value == values[0] for value in values) return copy.deepcopy(values[0]) new_record.metadata_doc['platform'] = harvest('platform', source_tile) new_record.metadata_doc['instrument'] = harvest('instrument', source_tile) # copy metadata record into xarray result['dataset'] = _docvariable(new_record, result.time) global_attributes = config['global_attributes'].copy() global_attributes.update(task['extra_global_attributes']) # write output write_dataset_to_netcdf(result, file_path, global_attributes=global_attributes, variable_params=config['variable_params']) return [new_record]