def test_chunksizes(tmpnetcdf_filename): nco = create_netcdf(tmpnetcdf_filename) coord1 = create_coordinate(nco, 'greg', numpy.array([1.0, 2.0, 3.0]), 'cubic gregs') coord2 = create_coordinate(nco, 'bleh', numpy.array([1.0, 2.0, 3.0, 4.0, 5.0]), 'metric blehs') no_chunks = create_variable(nco, 'no_chunks', Variable(numpy.dtype('int16'), None, ('greg', 'bleh'), None)) min_max_chunks = create_variable(nco, 'min_max_chunks', Variable(numpy.dtype('int16'), None, ('greg', 'bleh'), None), chunksizes=[2, 50]) nco.close() with netCDF4.Dataset(tmpnetcdf_filename) as nco: assert nco['no_chunks'].chunking() == 'contiguous' assert nco['min_max_chunks'].chunking() == [2, 5]
def test_chunksizes(tmpnetcdf_filename): nco = create_netcdf(tmpnetcdf_filename) coord1 = create_coordinate(nco, 'greg', numpy.array([1.0, 2.0, 3.0]), 'cubic gregs') coord2 = create_coordinate(nco, 'bleh', numpy.array([1.0, 2.0, 3.0, 4.0, 5.0]), 'metric blehs') no_chunks = create_variable(nco, 'no_chunks', Variable(numpy.dtype('int16'), None, ('greg', 'bleh'), None)) min_max_chunks = create_variable(nco, 'min_max_chunks', Variable(numpy.dtype('int16'), None, ('greg', 'bleh'), None), chunksizes=[2, 50]) nco.close() with netCDF4.Dataset(tmpnetcdf_filename) as nco: assert nco['no_chunks'].chunking() == 'contiguous' assert nco['min_max_chunks'].chunking() == [2, 5]
def make_fake_netcdf_dataset(nc_name, yaml_doc): from datacube.model import Variable from datacube.storage.netcdf_writer import create_variable, netcdfy_data from netCDF4 import Dataset import numpy as np content = yaml_doc.read_text() npdata = np.array([content], dtype=bytes) with Dataset(nc_name, 'w') as nco: var = Variable(npdata.dtype, None, ('time', ), None) nco.createDimension('time', size=1) create_variable(nco, 'dataset', var) nco['dataset'][:] = netcdfy_data(npdata)
def _create_storage_unit(self, stat: OutputProduct, output_filename: Path): all_measurement_defns = list(stat.product.measurements.values()) datasets = self._find_source_datasets(stat, uri=output_filename.as_uri()) variable_params = self._create_netcdf_var_params(stat) nco = self._nco_from_sources(datasets, self._geobox, all_measurement_defns, variable_params, output_filename) netcdf_writer.create_variable(nco, 'dataset', datasets, zlib=True) nco['dataset'][:] = netcdf_writer.netcdfy_data(datasets.values) return nco
def create_netcdf_storage_unit(filename, crs, coordinates, variables, variable_params, global_attributes=None, netcdfparams=None): """ Create a NetCDF file on disk. :param pathlib.Path filename: filename to write to :param datacube.utils.geometry.CRS crs: Datacube CRS object defining the spatial projection :param dict coordinates: Dict of named `datacube.model.Coordinate`s to create :param dict variables: Dict of named `datacube.model.Variable`s to create :param dict variable_params: Dict of dicts, with keys matching variable names, of extra parameters for variables :param dict global_attributes: named global attributes to add to output file :param dict netcdfparams: Extra parameters to use when creating netcdf file :return: open netCDF4.Dataset object, ready for writing to """ filename = Path(filename) if filename.exists(): raise RuntimeError('Storage Unit already exists: %s' % filename) try: filename.parent.mkdir(parents=True) except OSError: pass _LOG.info('Creating storage unit: %s', filename) nco = netcdf_writer.create_netcdf(str(filename), **(netcdfparams or {})) for name, coord in coordinates.items(): netcdf_writer.create_coordinate(nco, name, coord.values, coord.units) netcdf_writer.create_grid_mapping_variable(nco, crs) for name, variable in variables.items(): set_crs = all(dim in variable.dims for dim in crs.dimensions) var_params = variable_params.get(name, {}) data_var = netcdf_writer.create_variable(nco, name, variable, set_crs=set_crs, **var_params) for key, value in var_params.get('attrs', {}).items(): setattr(data_var, key, value) for key, value in (global_attributes or {}).items(): setattr(nco, key, value) return nco
def test_create_string_variable(tmpnetcdf_filename): nco = create_netcdf(tmpnetcdf_filename) coord = create_coordinate(nco, 'greg', numpy.array([1.0, 3.0, 9.0]), 'cubic gregs') dtype = numpy.dtype('S100') data = numpy.array(["test-str1", "test-str2", "test-str3"], dtype=dtype) var = create_variable(nco, 'str_var', Variable(dtype, None, ('greg',), None)) var[:] = netcdfy_data(data) nco.close() with netCDF4.Dataset(tmpnetcdf_filename) as nco: assert 'str_var' in nco.variables assert netCDF4.chartostring(nco['str_var'][0]) == data[0]
def test_create_string_variable(tmpnetcdf_filename): nco = create_netcdf(tmpnetcdf_filename) coord = create_coordinate(nco, 'greg', numpy.array([1.0, 3.0, 9.0]), 'cubic gregs') dtype = numpy.dtype('S100') data = numpy.array(["test-str1", "test-str2", "test-str3"], dtype=dtype) var = create_variable(nco, 'str_var', Variable(dtype, None, ('greg', ), None)) var[:] = netcdfy_data(data) nco.close() with netCDF4.Dataset(tmpnetcdf_filename) as nco: assert 'str_var' in nco.variables assert netCDF4.chartostring(nco['str_var'][0]) == data[0]
def saveNC(output,filename, history): nco=netcdf_writer.create_netcdf(filename) nco.history = (history.decode('utf-8').encode('ascii','replace')) coords=output.coords cnames=() for x in coords: netcdf_writer.create_coordinate(nco, x, coords[x].values, coords[x].units) cnames=cnames+(x,) netcdf_writer.create_grid_mapping_variable(nco, output.crs) for band in output.data_vars: output.data_vars[band].values[np.isnan(output.data_vars[band].values)]=nodata var= netcdf_writer.create_variable(nco, band, Variable(output.data_vars[band].dtype, nodata, cnames, None) ,set_crs=True) var[:] = netcdf_writer.netcdfy_data(output.data_vars[band].values) nco.close()
def test_create_string_variable(tmpdir, s1, s2, s3): tmpnetcdf_filename = get_tmpnetcdf_filename(tmpdir) str_var = 'str_var' nco = create_netcdf(tmpnetcdf_filename) coord = create_coordinate(nco, 'greg', numpy.array([1.0, 3.0, 9.0]), 'cubic gregs') dtype = numpy.dtype('S100') data = numpy.array([s1, s2, s3], dtype=dtype) var = create_variable(nco, str_var, Variable(dtype, None, ('greg',), None)) var[:] = netcdfy_data(data) nco.close() with netCDF4.Dataset(tmpnetcdf_filename) as nco: assert str_var in nco.variables for returned, expected in zip(read_strings_from_netcdf(tmpnetcdf_filename, variable=str_var), (s1, s2, s3)): assert returned == expected
def write_dataset_to_netcdf(access_unit, global_attributes, variable_params, filename): if filename.exists(): raise RuntimeError('Storage Unit already exists: %s' % filename) try: filename.parent.mkdir(parents=True) except OSError: pass # _LOG.info("Writing storage unit: %s", filename) nco = netcdf_writer.create_netcdf(str(filename)) for name, coord in access_unit.coords.items(): netcdf_writer.create_coordinate(nco, name, coord.values, coord.units) netcdf_writer.create_grid_mapping_variable(nco, access_unit.crs) for name, variable in access_unit.data_vars.items(): # Create variable var_params = variable_params.get(name, {}) data_var = netcdf_writer.create_variable( nco, name, Variable(variable.dtype, getattr(variable, 'nodata', None), variable.dims, getattr(variable, 'units', '1')), **var_params) # Write data data_var[:] = netcdf_writer.netcdfy_data(variable.values) # TODO: 'flags_definition', 'spectral_definition'? for key, value in variable_params.get(name, {}).get('attrs', {}).items(): setattr(data_var, key, value) # write global atrributes for key, value in global_attributes.items(): setattr(nco, key, value) nco.close()
def create_netcdf_storage_unit(filename, crs, coordinates, variables, variable_params, global_attributes=None, netcdfparams=None): if filename.exists(): raise RuntimeError('Storage Unit already exists: %s' % filename) try: filename.parent.mkdir(parents=True) except OSError: pass nco = netcdf_writer.create_netcdf(str(filename), **(netcdfparams or {})) for name, coord in coordinates.items(): netcdf_writer.create_coordinate(nco, name, coord.values, coord.units) netcdf_writer.create_grid_mapping_variable(nco, crs) for name, variable in variables.items(): set_crs = all(dim in variable.dims for dim in crs.dimensions) var_params = variable_params.get(name, {}) data_var = netcdf_writer.create_variable(nco, name, variable, set_crs=set_crs, **var_params) for key, value in var_params.get('attrs', {}).items(): setattr(data_var, key, value) for key, value in (global_attributes or {}).items(): setattr(nco, key, value) return nco
def test_chunksizes(tmpnetcdf_filename): nco = create_netcdf(tmpnetcdf_filename) x = numpy.arange(3, dtype='float32') y = numpy.arange(5, dtype='float32') coord1 = create_coordinate(nco, 'x', x, 'm') coord2 = create_coordinate(nco, 'y', y, 'm') assert coord1 is not None and coord2 is not None no_chunks = create_variable( nco, 'no_chunks', Variable(numpy.dtype('int16'), None, ('x', 'y'), None)) min_max_chunks = create_variable(nco, 'min_max_chunks', Variable(numpy.dtype('int16'), None, ('x', 'y'), None), chunksizes=(2, 50)) assert no_chunks is not None assert min_max_chunks is not None strings = numpy.array(["AAa", 'bbb', 'CcC'], dtype='S') strings = xr.DataArray(strings, dims=['x'], coords={'x': x}) create_variable(nco, 'strings_unchunked', strings) create_variable(nco, 'strings_chunked', strings, chunksizes=(1, )) nco.close() with netCDF4.Dataset(tmpnetcdf_filename) as nco: assert nco['no_chunks'].chunking() == 'contiguous' assert nco['min_max_chunks'].chunking() == [2, 5] assert nco['strings_unchunked'].chunking() == 'contiguous' assert nco['strings_chunked'].chunking() == [1, 3]
def compute_and_write(self): """ Computes the wofs confidence and filtered summary bands and write to the corresponding NetCDF file. The file template and location etc are read from the configs. """ geo_box = self.grid_spec.tile_geobox(self.tile_index) # Compute metadata env = self.cfg.get_env_of_product('wofs_filtered_summary') with Datacube(app='wofs-confidence', env=env) as dc: product = dc.index.products.get_by_name('wofs_filtered_summary') extent = self.grid_spec.tile_geobox(self.tile_index).extent center_time = datetime.now() uri = self.get_filtered_uri() dts = make_dataset(product=product, sources=self.factor_sources, extent=extent, center_time=center_time, uri=uri) metadata = yaml.dump(dts.metadata_doc, Dumper=SafeDumper, encoding='utf-8') # Compute dataset coords coords = dict() coords['time'] = Coordinate( netcdf_writer.netcdfy_coord( np.array([datetime_to_seconds_since_1970(center_time)])), ['seconds since 1970-01-01 00:00:00']) for dim in geo_box.dimensions: coords[dim] = Coordinate( netcdf_writer.netcdfy_coord(geo_box.coordinates[dim].values), geo_box.coordinates[dim].units) # Compute dataset variables spatial_var = Variable(dtype=np.dtype(DEFAULT_TYPE), nodata=DEFAULT_FLOAT_NODATA, dims=('time', ) + geo_box.dimensions, units=('seconds since 1970-01-01 00:00:00', ) + geo_box.crs.units) band1 = self.cfg.cfg['wofs_filtered_summary']['confidence'] band2 = self.cfg.cfg['wofs_filtered_summary']['confidence_filtered'] vars = {band1: spatial_var, band2: spatial_var} vars_params = {band1: {}, band2: {}} global_atts = self.cfg.cfg['global_attributes'] # Get crs string crs = self.cfg.cfg['storage']['crs'] if self.cfg.cfg['storage'].get( 'crs') else DEFAULT_CRS # Create a dataset container filename = self.get_filtered_uri() logger.info('creating', file=filename.name) netcdf_unit = create_netcdf_storage_unit(filename=filename, crs=CRS(crs), coordinates=coords, variables=vars, global_attributes=global_atts, variable_params=vars_params) # Confidence layer: Fill variable data and set attributes confidence = self.compute_confidence() netcdf_unit[band1][:] = netcdf_writer.netcdfy_data(confidence) netcdf_unit[band1].units = '1' netcdf_unit[band1].valid_range = [0, 1.0] netcdf_unit[band1].coverage_content_type = 'modelResult' netcdf_unit[band1].long_name = \ 'Wofs Confidence Layer predicted by {}'.format(self.confidence_model.factors.__str__()) # Confidence filtered wofs-stats frequency layer: Fill variable data and set attributes confidence_filtered = self.compute_confidence_filtered() netcdf_unit[band2][:] = netcdf_writer.netcdfy_data(confidence_filtered) netcdf_unit[band2].units = '1' netcdf_unit[band2].valid_range = [0, 1.0] netcdf_unit[band2].coverage_content_type = 'modelResult' netcdf_unit[ band2].long_name = 'WOfS-Stats frequency confidence filtered layer' # Metadata dataset_data = DataArray(data=[metadata], dims=('time', )) netcdf_writer.create_variable(netcdf_unit, 'dataset', dataset_data, zlib=True) netcdf_unit['dataset'][:] = netcdf_writer.netcdfy_data( dataset_data.values) netcdf_unit.close() logger.info('completed', file=filename.name)