def test_attach_attributes(self): """ Test the attach_attributes function. """ attrs = {"alpha": 1, "beta": 2} fname = "test_attach_attributes.h5" with h5py.File(fname, "w", **self.memory_kwargs) as fid: dset = fid.create_dataset("data", data=self.image_data) hdf5.attach_attributes(dset, attrs) test = {k: v for k, v in dset.attrs.items()} self.assertDictEqual(test, attrs)
def aggregate_ancillary(granule_groups): """ If the acquisition is part of a `tiled` scene such as Sentinel-2a, then we need to average the point measurements gathered from all granules. """ # initialise the mean result ozone = vapour = aerosol = elevation = 0.0 # number of granules in the scene n_tiles = len(granule_groups) for granule in granule_groups: group = granule[GroupName.ANCILLARY_GROUP.value] ozone += group[DatasetName.OZONE.value][()] vapour += group[DatasetName.WATER_VAPOUR.value][()] aerosol += group[DatasetName.AEROSOL.value][()] elevation += group[DatasetName.ELEVATION.value][()] # average ozone /= n_tiles vapour /= n_tiles aerosol /= n_tiles elevation /= n_tiles description = ("The {} value is an average from all the {} values " "retreived for each Granule.") attrs = {"data_source": "granule_average"} # output each average value back into the same granule ancillary group group_name = ppjoin(GroupName.ANCILLARY_GROUP.value, GroupName.ANCILLARY_AVG_GROUP.value) for granule in granule_groups: # for the multifile workflow, we only want to write to one granule try: group = granule.create_group(group_name) except ValueError: continue dset = group.create_dataset(DatasetName.OZONE.value, data=ozone) attrs["description"] = description.format(*(2 * ["Ozone"])) attach_attributes(dset, attrs) dset = group.create_dataset(DatasetName.WATER_VAPOUR.value, data=vapour) attrs["description"] = description.format(*(2 * ["Water Vapour"])) attach_attributes(dset, attrs) dset = group.create_dataset(DatasetName.AEROSOL.value, data=aerosol) attrs["description"] = description.format(*(2 * ["Aerosol"])) attach_attributes(dset, attrs) dset = group.create_dataset(DatasetName.ELEVATION.value, data=elevation) attrs["description"] = description.format(*(2 * ["Elevation"])) attach_attributes(dset, attrs)
def calculate_angles( acquisition, lon_lat_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, tle_path=None, trackpoints=12, ): """ Calculate the satellite view, satellite azimuth, solar zenith, solar azimuth, and relative aziumth angle grids, as well as the time grid. All grids are output as float32 ENVI files. A wrapper routine for the ``angle_all`` Fortran module built via ``F2Py``. :param acquisition: An instance of an `Acquisition` object. :param lon_lat_group:: The root HDF5 `Group` that contains the longitude and latitude datasets. The dataset pathnames are given by: * DatasetName.LON * DatasetName.LAT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.SATELLITE_VIEW * DatasetName.SATELLITE_AZIMUTH * DatasetName.SOLAR_ZENITH * DatasetName.SOLAR_AZIMUTH * DatasetName.RELATIVE_AZIMUTH * DatasetName.TIME * DatasetName.CENTRELINE * DatasetName.BOXLINE * DatasetName.SPHEROID * DatasetName.ORBITAL_ELEMENTS * DatasetName.SATELLITE_MODEL * DatasetName.SATELLITE_TRACK :param trackpoints: Number of trackpoints to use when calculating solar angles Default is 12 :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :param tle_path: A `str` to the directory containing the Two Line Element data. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ century = calculate_julian_century(acquisition.acquisition_datetime) geobox = acquisition.gridded_geo_box() # longitude and latitude datasets longitude = lon_lat_group[DatasetName.LON.value] latitude = lon_lat_group[DatasetName.LAT.value] # Determine approximate pixel size lat_data = latitude[0:2, 0] psy = abs(lat_data[1] - lat_data[0]) lon_data = longitude[0, 0:2] psx = abs(lon_data[1] - lon_data[0]) # Min and Max lat extents # This method should handle northern and southern hemispheres # TODO: Put in a conditional over the 1 degree buffer min_lat = (min( min(geobox.ul_lonlat[1], geobox.ur_lonlat[1]), min(geobox.ll_lonlat[1], geobox.lr_lonlat[1]), ) - 1) max_lat = (max( max(geobox.ul_lonlat[1], geobox.ur_lonlat[1]), max(geobox.ll_lonlat[1], geobox.lr_lonlat[1]), ) + 1) # Get the lat/lon of the scene centre # check if we have a file with GPS satellite track points # which can be used for cases of image granules/tiles, eg Sentinel-2A if acquisition.gps_file: points = acquisition.read_gps_file() subs = points[(points.latitude >= min_lat) & (points.latitude <= max_lat)] idx = subs.shape[0] // 2 - 1 centre_xy = (subs.iloc[idx].longitude, subs.iloc[idx].latitude) else: centre_xy = geobox.centre_lonlat # Get the earth spheroidal paramaters spheroid = setup_spheroid(geobox.crs.ExportToWkt()) # Get the satellite orbital elements orbital_elements = setup_orbital_elements(acquisition, tle_path) # Get the satellite model paramaters smodel = setup_smodel(centre_xy[0], centre_xy[1], spheroid[0], orbital_elements[0], psx, psy) # Get the times and satellite track information track = setup_times( min_lat, max_lat, spheroid[0], orbital_elements[0], smodel[0], psx, psy, trackpoints, ) # Initialise the output files if out_group is None: fid = h5py.File("satellite-solar-angles.h5", "w", driver="core", backing_store=False) else: fid = out_group if GroupName.SAT_SOL_GROUP.value not in fid: fid.create_group(GroupName.SAT_SOL_GROUP.value) if filter_opts is None: filter_opts = {} else: filter_opts = filter_opts.copy() filter_opts["chunks"] = acquisition.tile_size grp = fid[GroupName.SAT_SOL_GROUP.value] # store the parameter settings used with the satellite and solar angles # function params = { "dimensions": (acquisition.lines, acquisition.samples), "lines": acquisition.lines, "samples": acquisition.samples, "century": century, "decimal_hour": acquisition.decimal_hour(), "acquisition_datetime": acquisition.acquisition_datetime, "centre_longitude_latitude": centre_xy, "minimum_latiude": min_lat, "maximum_latiude": max_lat, "latitude_buffer": 1.0, "max_view_angle": acquisition.maximum_view_angle, } _store_parameter_settings(grp, spheroid[1], orbital_elements[1], smodel[1], track[1], params) out_dtype = "float32" no_data = np.nan kwargs = compression.config(**filter_opts).dataset_compression_kwargs() kwargs["shape"] = (acquisition.lines, acquisition.samples) kwargs["fillvalue"] = no_data kwargs["dtype"] = out_dtype sat_v_ds = grp.create_dataset(DatasetName.SATELLITE_VIEW.value, **kwargs) sat_az_ds = grp.create_dataset(DatasetName.SATELLITE_AZIMUTH.value, **kwargs) sol_z_ds = grp.create_dataset(DatasetName.SOLAR_ZENITH.value, **kwargs) sol_az_ds = grp.create_dataset(DatasetName.SOLAR_AZIMUTH.value, **kwargs) rel_az_ds = grp.create_dataset(DatasetName.RELATIVE_AZIMUTH.value, **kwargs) time_ds = grp.create_dataset(DatasetName.TIME.value, **kwargs) # base attributes for image datasets attrs = { "crs_wkt": geobox.crs.ExportToWkt(), "geotransform": geobox.transform.to_gdal(), "no_data_value": no_data, } attach_image_attributes(sat_v_ds, attrs) attach_image_attributes(sat_az_ds, attrs) attach_image_attributes(sol_z_ds, attrs) attach_image_attributes(sol_az_ds, attrs) attach_image_attributes(rel_az_ds, attrs) attach_image_attributes(time_ds, attrs) attrs = { "description": "Contains the satellite viewing angle in degrees.", "units": "degrees", "alias": "satellite-view", } attach_attributes(sat_v_ds, attrs) attrs = { "description": "Contains the satellite azimuth angle in degrees.", "units": "degrees", "alias": "satellite-azimuth", } attach_attributes(sat_az_ds, attrs) attrs = { "description": "Contains the solar zenith angle in degrees.", "units": "degrees", "alias": "solar-zenith", } attach_attributes(sol_z_ds, attrs) attrs = { "description": "Contains the solar azimuth angle in degrees.", "units": "degrees", "alias": "solar-azimuth", } attach_attributes(sol_az_ds, attrs) attrs = { "description": "Contains the relative azimuth angle in degrees.", "units": "degrees", "alias": "relative-azimuth", } attach_attributes(rel_az_ds, attrs) attrs = { "description": "Contains the time from apogee in seconds.", "units": "seconds", "alias": "timedelta", } attach_attributes(time_ds, attrs) # Initialise centre line variables x_cent = np.zeros((acquisition.lines), dtype=out_dtype) n_cent = np.zeros((acquisition.lines), dtype=out_dtype) for tile in acquisition.tiles(): idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) # read the lon and lat tile lon_data = longitude[idx] lat_data = latitude[idx] # may not be processing full row wise (all columns) dims = lon_data.shape col_offset = idx[1].start view = np.full(dims, no_data, dtype=out_dtype) azi = np.full(dims, no_data, dtype=out_dtype) asol = np.full(dims, no_data, dtype=out_dtype) soazi = np.full(dims, no_data, dtype=out_dtype) rela_angle = np.full(dims, no_data, dtype=out_dtype) time = np.full(dims, no_data, dtype=out_dtype) # loop each row within each tile (which itself could be a single row) for i in range(lon_data.shape[0]): row_id = idx[0].start + i + 1 # FORTRAN 1 based index stat = angle( dims[1], acquisition.lines, row_id, col_offset, lat_data[i], lon_data[i], spheroid[0], orbital_elements[0], acquisition.decimal_hour(), century, trackpoints, smodel[0], track[0], view[i], azi[i], asol[i], soazi[i], rela_angle[i], time[i], x_cent, n_cent, ) # x_cent[idx[0]], n_cent[idx[0]]) if stat != 0: msg = ("Error in calculating angles at row: {}.\n" "No interval found in track!") raise RuntimeError(msg.format(row_id - 1)) # output to disk sat_v_ds[idx] = view sat_az_ds[idx] = azi sol_z_ds[idx] = asol sol_az_ds[idx] = soazi rel_az_ds[idx] = rela_angle time_ds[idx] = time # outputs # TODO: rework create_boxline so that it reads tiled data effectively create_centreline_dataset(geobox, x_cent, n_cent, grp) create_boxline( acquisition, sat_v_ds[:], grp[DatasetName.CENTRELINE.value], grp, acquisition.maximum_view_angle, ) if out_group is None: return fid
def convert_file(fname, out_fname, compression, filter_opts): """ Convert a PR_WTR NetCDF file into HDF5. :param fname: A str containing the PR_WTR filename. :param out_fname: A str containing the output filename for the HDF5 file. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: None. Content is written directly to disk. """ with h5py.File(out_fname, 'w') as fid: with rasterio.open(fname) as ds: name_fmt = 'BAND-{}' # global attributes # TODO update the history attrs # TODO remove the NC_GLOBAL str and just have plain attr names g_attrs = ds.tags() # get timestamp info origin = g_attrs.pop('time#units').replace('hours since ', '') hours = json.loads( g_attrs.pop('NETCDF_DIM_time_VALUES').replace('{', '[').replace('}', ']') ) df = pandas.DataFrame( { 'timestamp': pandas.to_datetime(hours, unit='h', origin=origin), 'band_name': [name_fmt.format(i+1) for i in range(ds.count)] } ) df['dataset_name'] = df.timestamp.dt.strftime('%Y/%B-%d/%H%M') df['dataset_name'] = df['dataset_name'].str.upper() # create a timestamp and band name index table dataset desc = "Timestamp and Band Name index information." attrs = { 'description': desc } write_dataframe(df, 'INDEX', fid, compression, attrs=attrs) attach_attributes(fid, g_attrs) # process every band for i in range(1, ds.count + 1): ds_name = df.iloc[i-1].dataset_name # create empty or copy the user supplied filter options if not filter_opts: f_opts = dict() else: f_opts = filter_opts.copy() # band attributes # TODO remove NETCDF tags # TODO add fillvalue attr attrs = ds.tags(i) attrs['timestamp'] = df.iloc[i-1]['timestamp'] attrs['band_name'] = df.iloc[i-1]['band_name'] attrs['geotransform'] = ds.transform.to_gdal() attrs['crs_wkt'] = CRS.ExportToWkt() # use ds native chunks if none are provided if 'chunks' not in f_opts: try: f_opts['chunks'] = ds.block_shapes[i] except IndexError: print("Chunk error: {}".format(fname)) f_opts['chunks'] = (73, 144) # write to disk as an IMAGE Class Dataset write_h5_image(ds.read(i), ds_name, fid, attrs=attrs, compression=compression, filter_opts=f_opts)
def convert_file(fname, out_h5: h5py.Group, compression, filter_opts: Optional[Dict] = None): """ Convert a PR_WTR NetCDF file into HDF5. :param fname: A str containing the PR_WTR filename. :param out_fname: A h5py.Group to write output datasets to :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: None. Content is written directly to disk. """ with rasterio.open(fname) as ds: name_fmt = "BAND-{}" # global attributes # TODO update the history attrs # TODO remove the NC_GLOBAL str and just have plain attr names g_attrs = ds.tags() # get timestamp info origin = g_attrs.pop("time#units").replace("hours since ", "") hours = json.loads( g_attrs.pop("NETCDF_DIM_time_VALUES").replace("{", "[").replace( "}", "]")) df = pandas.DataFrame({ "timestamp": pandas.to_datetime(hours, unit="h", origin=origin), "band_name": [name_fmt.format(i + 1) for i in range(ds.count)], }) df["dataset_name"] = df.timestamp.dt.strftime("%Y/%B-%d/%H%M") df["dataset_name"] = df["dataset_name"].str.upper() # create a timestamp and band name index table dataset desc = "Timestamp and Band Name index information." attrs = {"description": desc} write_dataframe(df, "INDEX", out_h5, compression, attrs=attrs) attach_attributes(out_h5, g_attrs) # process every band for i in range(1, ds.count + 1): ds_name = df.iloc[i - 1].dataset_name # create empty or copy the user supplied filter options if not filter_opts: f_opts = dict() else: f_opts = filter_opts.copy() # band attributes # TODO remove NETCDF tags # TODO add fillvalue attr attrs = ds.tags(i) attrs["timestamp"] = df.iloc[i - 1]["timestamp"].replace( tzinfo=timezone.utc) attrs["band_name"] = df.iloc[i - 1]["band_name"] attrs["geotransform"] = ds.transform.to_gdal() attrs["crs_wkt"] = CRS.ExportToWkt() # use ds native chunks if none are provided if "chunks" not in f_opts: try: f_opts["chunks"] = ds.block_shapes[i] except IndexError: print("Chunk error: {}".format(fname)) f_opts["chunks"] = (73, 144) # write to disk as an IMAGE Class Dataset write_h5_image( ds.read(i), ds_name, out_h5, attrs=attrs, compression=compression, filter_opts=f_opts, )
def convert_tile(fname, out_h5: h5py.Group, compression, filter_opts): """ Convert a MCD43A1 HDF4 tile into HDF5. Global and datasetl level metadata are copied across. :param fname: A str containing the MCD43A1 filename. :param out_h5: A h5py.Group to write the output data to :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: None. Content is written directly to disk. """ # read the geo-spatial information beforehand # relying on gdal to parse it geospatial = {} with rasterio.open(fname) as ds: for sds_name in ds.subdatasets: with rasterio.open(sds_name) as sds: band_name = sds_name.split(":")[-1] geospatial[band_name] = { "geotransform": sds.transform.to_gdal(), "crs_wkt": sds.crs.wkt, } # convert data with netCDF4.Dataset(fname) as ds: ds.set_auto_scale(False) # global attributes global_attrs = {key: ds.getncattr(key) for key in ds.ncattrs()} attach_attributes(out_h5, global_attrs) # find and convert every subsdataset (sds) for sds_name in sorted(ds.variables, key=_brdf_netcdf_band_orderer): sds = ds.variables[sds_name] # create empty or copy the user supplied filter options if not filter_opts: f_opts = dict() else: f_opts = filter_opts.copy() # Recreate datasets as 2-dimensional dataset dim1, dim2, *_ = sds.shape if "chunks" not in f_opts: assert dim1 == 2400 and dim2 == 2400 f_opts["chunks"] = (240, 240) else: f_opts["chunks"] = (f_opts[0], f_opts[1]) # subdataset attributes and spatial attributes attrs = {key: sds.getncattr(key) for key in sds.ncattrs()} # attrs['geotransform'] = sds.transform.to_gdal() # attrs['crs_wkt'] = sds.crs.wkt attrs.update(geospatial[sds_name]) in_arr = sds[:] if len(in_arr.shape) == 3: data = numpy.ndarray(shape=(dim1, dim2), dtype=OUT_DTYPE) for idx, band_name in enumerate(OUT_DTYPE.names): data[band_name] = in_arr[:, :, idx] else: data = in_arr # write to disk as an IMAGE Class Dataset write_h5_image( data, sds_name, out_h5, attrs=attrs, compression=compression, filter_opts=f_opts, )
def convert_tile(fname, out_fname, compression, filter_opts): """ Convert a MCD43A1 HDF4 tile into HDF5. Global and datasetl level metadata are copied across. :param fname: A str containing the MCD43A1 filename. :param out_fname: A str containing the output filename for the HDF5 file. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: None. Content is written directly to disk. """ with h5py.File(out_fname, 'w') as fid: with rasterio.open(fname) as ds: # global attributes attach_attributes(fid, ds.tags()) # find and convert every subsdataset (sds) for sds_name in ds.subdatasets: with rasterio.open(sds_name) as sds: ds_name = Path(sds_name.replace(':', '/')).name # create empty or copy the user supplied filter options if not filter_opts: f_opts = dict() else: f_opts = filter_opts.copy() # use sds native chunks if none are provided if 'chunks' not in f_opts: f_opts['chunks'] = list(sds.block_shapes[0]) # modify to have 3D chunks if we have a multiband sds if sds.count == 3: # something could go wrong if a user supplies # a 3D chunk eg (2, 256, 340) f_opts['chunks'].insert(0, 1) f_opts['chunks'] = tuple(f_opts['chunks']) else: f_opts['chunks'] = tuple(f_opts['chunks']) # subdataset attributes and spatial attributes attrs = sds.tags() attrs['geotransform'] = sds.transform.to_gdal() attrs['crs_wkt'] = sds.crs.wkt # ensure single band sds is read a 2D not 3D data = sds.read() if sds.count == 3 else sds.read(1) # write to disk as an IMAGE Class Dataset write_h5_image(data, ds_name, fid, attrs=attrs, compression=compression, filter_opts=f_opts)