def do_test(self, test_input): """Check sizes and coverage for a list of test input.""" for (samples, lines, xtile, ytile) in test_input: tiles_list = list(generate_tiles(samples, lines, xtile, ytile)) self.check_sizes(xtile, ytile, tiles_list) self.check_tiling(samples, lines, tiles_list)
def _convert_4d(rds, fid, dataset_name, compression, filter_opts): """ Private routine for converting the multiples of 37 layer atmospheric data in the GRIB file to HDF5. For a months worth of data, the dimensions become: * (day, atmospheric level, y, x) """ attrs = { "geotransform": rds.transform.to_gdal(), "crs_wkt": rds.crs.wkt, "history": "Converted to HDF5", } # band groups of 37, nrows to process (ytile) band_groups = range(1, rds.count + 1, 37) ytile = filter_opts["chunks"][2] dims = (len(band_groups), 37, rds.height, rds.width) tiles = generate_tiles(rds.width, rds.height, rds.width, ytile) # dataset creation options kwargs = compression.config(**filter_opts).dataset_compression_kwargs() kwargs["shape"] = dims kwargs["dtype"] = rds.dtypes[0] dataset = fid.create_dataset(dataset_name, **kwargs) attach_image_attributes(dataset, attrs) # add dimension labels, but should we also include dimension scales? dataset.dims[0].label = "Day" dataset.dims[1].label = "Atmospheric Level" dataset.dims[2].label = "Y" dataset.dims[3].label = "X" # process by spatial tile containing 37 atmospheric layers for 1 day for i, bg in enumerate(band_groups): bands = list(range(bg, bg + 37)) for tile in tiles: idx = ( slice(i, bg), slice(None), slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1]), ) dataset[idx] = rds.read(bands, window=tile) # metadata metadata = metadata_dataframe(rds) write_dataframe(metadata, "METADATA", fid, compression)
def generate_windows(shape: Iterable[int], compute_chunks: Iterable[int]) -> Iterable[Iterable[int]]: """ Generates a window of height and width equivalent to compute_chunk's shape. :param shape: Total size to iterate over :param compute_chunk: Size of shape subset at a time :return: a subset window of ((y, x), (y, x)) """ for x, y in generate_tiles(shape[0], shape[1], compute_chunks[0], compute_chunks[1]): yield (slice(*y), slice(*x))
def _convert_3d(rds, fid, dataset_name, compression, filter_opts): """ Private routine for converting the 37 layer atmospheric data in the GRIB file to HDF5. """ # basic metadata to attach to the dataset attrs = { 'geotransform': rds.transform.to_gdal(), 'crs_wkt': rds.crs.wkt, 'history': 'Converted to HDF5' } # bands list, nrows to process (ytile) bands = list(range(1, rds.count + 1)) ytile = filter_opts['chunks'][1] dims = (rds.count, rds.height, rds.width) # dataset creation options kwargs = compression.config(**filter_opts).dataset_compression_kwargs() kwargs['shape'] = dims kwargs['dtype'] = rds.dtypes[0] dataset = fid.create_dataset(dataset_name, **kwargs) attach_image_attributes(dataset, attrs) # add dimension labels, but should we also include dimension scales? dataset.dims[0].label = 'Atmospheric Level' dataset.dims[1].label = 'Y' dataset.dims[2].label = 'X' # process by tile for tile in generate_tiles(rds.width, rds.height, rds.width, ytile): idx = ( slice(None), slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1]) ) dataset[idx] = rds.read(bands, window=tile) # metadata metadata = metadata_dataframe(rds) write_dataframe(metadata, 'METADATA', fid, compression)
def convert_vrt( fname, out_h5: h5py.Group, dataset_name="dataset", compression=H5CompressionFilter.LZF, filter_opts=None, attrs=None, ): """ Convert the VRT mosaic to HDF5. """ with rasterio.open(fname) as rds: # set default chunks and set dimensions if rds.count == 3: chunks = (3, 256, 256) dims = (3, rds.height, rds.width) else: chunks = (256, 256) dims = (rds.height, rds.width) # create empty or copy the user supplied filter options if not filter_opts: filter_opts = dict() filter_opts["chunks"] = chunks else: filter_opts = filter_opts.copy() if "chunks" not in filter_opts: filter_opts["chunks"] = chunks # modify to have 3D chunks if we have a multiband vrt if rds.count == 3 and len(filter_opts["chunks"]) != 3: # copy the users original 2D chunk and insert the third chunks = list(filter_opts["chunks"]) chunks.insert(0, 3) filter_opts["chunks"] = chunks # dataset attributes if attrs: attrs = attrs.copy() else: attrs = {} attrs["geotransform"] = rds.transform.to_gdal() attrs["crs_wkt"] = rds.crs.wkt attrs["nodata"] = rds.nodata # dataset creation options kwargs = compression.config(**filter_opts).dataset_compression_kwargs() kwargs["shape"] = dims kwargs["dtype"] = rds.dtypes[0] dataset = out_h5.create_dataset(dataset_name, **kwargs) attach_image_attributes(dataset, attrs) # tiled processing (all cols by chunked rows) ytile = filter_opts["chunks"][1] if rds.count == 3 else filter_opts["chunks"][0] tiles = generate_tiles(rds.width, rds.height, rds.width, ytile) for tile in tiles: # numpy index if rds.count == 3: idx = ( slice(None), slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1]), ) else: idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) # ensure single band rds is read as 2D not 3D data = rds.read(window=tile) if rds.count == 3 else rds.read(1, window=tile) # write dataset[idx] = data
def write_img(array, filename, driver='GTiff', geobox=None, nodata=None, tags=None, options=None, cogtif=False, levels=None, resampling=Resampling.nearest): """ Writes a 2D/3D image to disk using rasterio. :param array: A 2D/3D NumPy array. :param filename: A string containing the output file name. :param driver: A string containing a GDAL compliant image driver. Default is 'GTiff'. :param geobox: An instance of a GriddedGeoBox object. :param nodata: A value representing the no data value for the array. :param tags: A dictionary of dataset-level metadata. :param options: A dictionary containing other dataset creation options. See creation options for the respective GDAL formats. :param cogtif: If set to True, override the `driver` keyword with `GTiff` and create a Cloud Optimised GeoTiff. Default is False. See: https://trac.osgeo.org/gdal/wiki/CloudOptimizedGeoTIFF :param levels: If cogtif is set to True, build overviews/pyramids according to levels. Default levels are [2, 4, 8, 16, 32]. :param resampling: If cogtif is set to True, build overviews/pyramids using a resampling method from `rasterio.enums.Resampling`. Default is `Resampling.nearest`. :notes: If array is an instance of a `h5py.Dataset`, then the output file will include blocksizes based on the `h5py.Dataset's` chunks. To override the blocksizes, specify them using the `options` keyword. Eg {'blockxsize': 512, 'blockysize': 512}. If `cogtif` is set to True, the default blocksizes will be 256x256. To override this behaviour, specify them using the `options` keyword. Eg {'blockxsize': 512, 'blockysize': 512}. """ # Get the datatype of the array dtype = array.dtype.name # Check for excluded datatypes excluded_dtypes = ['int64', 'int8', 'uint64'] if dtype in excluded_dtypes: msg = "Datatype not supported: {dt}".format(dt=dtype) raise TypeError(msg) # convert any bools to uin8 if dtype == 'bool': array = np.uint8(array) dtype = 'uint8' ndims = array.ndim dims = array.shape # Get the (z, y, x) dimensions (assuming BSQ interleave) if ndims == 2: samples = dims[1] lines = dims[0] bands = 1 elif ndims == 3: samples = dims[2] lines = dims[1] bands = dims[0] else: logging.error('Input array is not of 2 or 3 dimensions!!!') err = 'Array dimensions: {dims}'.format(dims=ndims) raise IndexError(err) # If we have a geobox, then retrieve the geotransform and projection if geobox is not None: transform = geobox.transform projection = geobox.crs.ExportToWkt() else: transform = None projection = None # override the driver if we are creating a cogtif if cogtif: driver = 'GTiff' # compression predictor choices predictor = {'int8': 2, 'uint8': 2, 'int16': 2, 'uint16': 2, 'int32': 2, 'uint32': 2, 'int64': 2, 'uint64': 2, 'float32': 3, 'float64': 3} kwargs = {'count': bands, 'width': samples, 'height': lines, 'crs': projection, 'transform': transform, 'dtype': dtype, 'driver': driver, 'nodata': nodata, 'predictor': predictor[dtype]} if isinstance(array, h5py.Dataset): # TODO: if array is 3D get x & y chunks if array.chunks[1] == array.shape[1]: # GDAL doesn't like tiled or blocksize options to be set # the same length as the columns (probably true for rows as well) array = array[:] else: y_tile, x_tile = array.chunks tiles = generate_tiles(samples, lines, x_tile, y_tile) # add blocksizes to the creation keywords kwargs['tiled'] = 'yes' kwargs['blockxsize'] = x_tile kwargs['blockysize'] = y_tile # the user can override any derived blocksizes by supplying `options` if options is not None: for key in options: kwargs[key] = options[key] with tempfile.TemporaryDirectory() as tmpdir: out_fname = pjoin(tmpdir, basename(filename)) if cogtif else filename with rasterio.open(out_fname, 'w', **kwargs) as outds: if bands == 1: if isinstance(array, h5py.Dataset): for tile in tiles: idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) outds.write(array[idx], 1, window=tile) else: outds.write(array, 1) else: if isinstance(array, h5py.Dataset): for tile in tiles: idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) subs = array[:, idx[0], idx[1]] for i in range(bands): outds.write(subs[i], i + 1, window=tile) else: for i in range(bands): outds.write(array[i], i + 1) if tags is not None: outds.update_tags(**tags) # overviews/pyramids if cogtif: if levels is None: levels = [2, 4, 8, 16, 32] outds.build_overviews(levels, resampling) if cogtif: cmd = ['gdal_translate', '-co', 'TILED=YES', '-co', 'COPY_SRC_OVERVIEWS=YES', '-co', '{}={}'.format('PREDICTOR', predictor[dtype])] for key, value in options.items(): cmd.extend(['-co', '{}={}'.format(key, value)]) cmd.extend([out_fname, filename]) subprocess.check_call(cmd, cwd=dirname(filename))
def convert_file(fname, out_fname, group_name='/', dataset_name='dataset', compression=H5CompressionFilter.LZF, filter_opts=None, attrs=None): """ Convert generic single band image file to HDF5. Processes in a tiled fashion to minimise memory use. Will process all columns by n (default 256) rows at a time, where n can be specified via command line using: --filter-opts '{"chunks": (n, xsize)}' :param fname: A str containing the raster filename. :param out_fname: A str containing the output filename for the HDF5 file. :param dataset_name: A str containing the dataset name to use in the HDF5 file. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :param filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :param attrs: A dict containing any attribute information to be attached to the HDF5 Dataset. :return: None. Content is written directly to disk. """ # opening as `append` mode allows us to add additional datasets with h5py.File(out_fname) as fid: with rasterio.open(fname) as ds: # create empty or copy the user supplied filter options if not filter_opts: filter_opts = dict() else: filter_opts = filter_opts.copy() # use sds native chunks if none are provided if 'chunks' not in filter_opts: filter_opts['chunks'] = (256, 256) # read all cols for n rows (ytile), as the GA's DEM is BSQ interleaved ytile = filter_opts['chunks'][0] # dataset attributes if attrs: attrs = attrs.copy() else: attrs = {} attrs['geotransform'] = ds.transform.to_gdal() attrs['crs_wkt'] = ds.crs.wkt # dataset creation options kwargs = compression.config( **filter_opts).dataset_compression_kwargs() kwargs['shape'] = (ds.height, ds.width) kwargs['dtype'] = ds.dtypes[0] dataset_name = ppjoin(group_name, dataset_name) dataset = fid.create_dataset(dataset_name, **kwargs) attach_image_attributes(dataset, attrs) # process each tile for tile in generate_tiles(ds.width, ds.height, ds.width, ytile): idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) data = ds.read(1, window=tile) dataset[idx] = data
def convert_file( fname: Path, out_h5: h5py.Group, out_dataset_path: str = "SWFO-DSM", compression=H5CompressionFilter.LZF, filter_opts=None, attrs=None, ): """ Convert generic single band image file to HDF5. Processes in a tiled fashion to minimise memory use. Will process all columns by n (default 256) rows at a time, where n can be specified via command line using: --filter-opts '{"chunks": (n, xsize)}' :param fname: A str containing the raster filename. :param out_fname: A str containing the output filename for the HDF5 file. :param dataset_name: A str containing the dataset name to use in the HDF5 file. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :param filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :param attrs: A dict containing any attribute information to be attached to the HDF5 Dataset. :return: None. Content is written directly to disk. """ with rasterio.open(str(fname), "r") as ds: # create empty or copy the user supplied filter options if not filter_opts: filter_opts = dict() else: filter_opts = filter_opts.copy() # use sds native chunks if none are provided if "chunks" not in filter_opts: filter_opts["chunks"] = (min(256, ds.height), min(256, ds.width)) # read all cols for n rows (ytile), as the GA's DEM is BSQ interleaved ytile = filter_opts["chunks"][0] # dataset attributes if attrs: attrs = attrs.copy() else: attrs = {} attrs["geotransform"] = ds.transform.to_gdal() attrs["crs_wkt"] = ds.crs.wkt # dataset creation options kwargs = compression.config(**filter_opts).dataset_compression_kwargs() kwargs["shape"] = (ds.height, ds.width) kwargs["dtype"] = ds.dtypes[0] dataset = out_h5.create_dataset(out_dataset_path, **kwargs) attach_image_attributes(dataset, attrs) # process each tile for tile in generate_tiles(ds.width, ds.height, ds.width, ytile): idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) data = ds.read(1, window=tile) dataset[idx] = data assert ds.count == 1 # checksum call assumes single band image metadata = { "id": str( generate_fallback_uuid(PRODUCT_HREF, path=str(fname.stem), checksum=ds.checksum(1))) } return [metadata], [out_dataset_path]
def mndwi(wagl_h5_file, granule, out_fname): """ Computes the mndwi for a given granule in a wagl h5 file. Parameters ---------- wagl_h5_file : str wagl-water-atcor generated h5 file granule : str Group path of the granule within the h5 file out_fname : str Output filename of the h5 file """ # specify the reflectance products to use in generating mndwi products = ["LMBADJ"] # specify the resampling approach for the SWIR band resample_approach = Resampling.bilinear h5_fid = h5py.File(out_fname, "w") # find the granule index in the wagl_h5_file fid = h5py.File(wagl_h5_file, "r") granule_fid = fid[granule] paths = find(granule_fid, "IMAGE") # get platform name md = yaml.load(fid[granule + "/METADATA/CURRENT"][()], Loader=yaml.FullLoader) platform_id = md["source_datasets"]["platform_id"] # store mndwi-based products into a group mndwi_grp = h5_fid.create_group("mndwi") for i, prod in enumerate(products): # search the h5 groups & get paths to the green and swir bands green_path, swir_path = get_mndwi_bands(granule, platform_id, prod, paths) green_ds = granule_fid[green_path] chunks = green_ds.chunks nRows, nCols = green_ds.shape geobox = GriddedGeoBox.from_dataset(green_ds) nodata = green_ds.attrs["no_data_value"] # create output h5 attributes desc = "MNDWI derived with {0} and {1} ({2} reflectances)".format( psplit(green_path)[-1], psplit(swir_path)[-1], prod, ) attrs = { "crs_wkt": geobox.crs.ExportToWkt(), "geotransform": geobox.transform.to_gdal(), "no_data_value": nodata, "granule": granule, "description": desc, "platform": platform_id, "spatial_resolution": abs(geobox.transform.a), } if platform_id.startswith("SENTINEL_2"): # we need to upscale the swir band swir_ds = granule_fid[swir_path] swir_im = reproject_array_to_array( src_img=swir_ds[:], src_geobox=GriddedGeoBox.from_dataset(swir_ds), dst_geobox=geobox, src_nodata=swir_ds.attrs["no_data_value"], dst_nodata=nodata, resampling=resample_approach, ) attrs["SWIR_resampling_method"] = resample_approach.name else: swir_im = granule_fid[swir_path][:] # ------------------------- # # Compute mndwi via tiles # # and save tiles to h5 # # ------------------------- # tiles = generate_tiles(samples=nRows, lines=nCols, xtile=chunks[1], ytile=chunks[0]) # create mndwi dataset mndwi_ds = mndwi_grp.create_dataset( f"mndwi_image_{prod}", shape=(nRows, nCols), dtype="float32", compression="lzf", chunks=chunks, shuffle=True, ) for tile in tiles: green_tile = green_ds[tile] swir_tile = swir_im[tile] mndwi_tile = compute_mndwi(green_tile, swir_tile) # perform masking mask = ((green_tile == nodata) | (swir_tile == nodata) | (~np.isfinite(mndwi_tile))) mndwi_tile[mask] = nodata mndwi_ds[tile] = mndwi_tile # add attrs to dataset attach_image_attributes(mndwi_ds, attrs) fid.close() h5_fid.close()
def write_img(array, filename, driver='GTiff', geobox=None, nodata=None, tags=None, options=None, levels=None, resampling=Resampling.nearest, config_options=None): """ Writes a 2D/3D image to disk using rasterio. :param array: A 2D/3D NumPy array. :param filename: A string containing the output file name. :param driver: A string containing a GDAL compliant image driver. Default is 'GTiff'. :param geobox: An instance of a GriddedGeoBox object. :param nodata: A value representing the no data value for the array. :param tags: A dictionary of dataset-level metadata. :param options: A dictionary containing other dataset creation options. See creation options for the respective GDAL formats. :param levels: build overviews/pyramids according to levels :param resampling: If levels is set, build overviews using a resampling method from `rasterio.enums.Resampling` Default is `Resampling.nearest`. :param config_options: A dictionary containing the options to configure GDAL's environment's default configurations :notes: If array is an instance of a `h5py.Dataset`, then the output file will include blocksizes based on the `h5py.Dataset's` chunks. To override the blocksizes, specify them using the `options` keyword. Eg {'blockxsize': 512, 'blockysize': 512}. """ # Get the datatype of the array dtype = array.dtype.name # Check for excluded datatypes excluded_dtypes = ['int64', 'int8', 'uint64'] if dtype in excluded_dtypes: msg = "Datatype not supported: {dt}".format(dt=dtype) raise TypeError(msg) # convert any bools to uin8 if dtype == 'bool': array = np.uint8(array) dtype = 'uint8' ndims = array.ndim dims = array.shape # Get the (z, y, x) dimensions (assuming BSQ interleave) if ndims == 2: samples = dims[1] lines = dims[0] bands = 1 elif ndims == 3: samples = dims[2] lines = dims[1] bands = dims[0] else: _LOG.error('Input array is not of 2 or 3 dimensions!!!') err = 'Array dimensions: {dims}'.format(dims=ndims) raise IndexError(err) # If we have a geobox, then retrieve the geotransform and projection if geobox is not None: transform = geobox.transform projection = geobox.crs.ExportToWkt() else: transform = None projection = None # compression predictor choices predictor = {'int8': 2, 'uint8': 2, 'int16': 2, 'uint16': 2, 'int32': 2, 'uint32': 2, 'int64': 2, 'uint64': 2, 'float32': 3, 'float64': 3} kwargs = {'count': bands, 'width': samples, 'height': lines, 'crs': projection, 'transform': transform, 'dtype': dtype, 'driver': driver, 'nodata': nodata, 'predictor': predictor[dtype]} if isinstance(array, h5py.Dataset): # TODO: if array is 3D get x & y chunks if array.chunks[1] == array.shape[1]: # GDAL doesn't like tiled or blocksize options to be set # the same length as the columns (probably true for rows as well) array = array[:] else: y_tile, x_tile = array.chunks tiles = generate_tiles(samples, lines, x_tile, y_tile) if 'tiled' in options: kwargs['blockxsize'] = options.pop('blockxsize', x_tile) kwargs['blockysize'] = options.pop('blockysize', y_tile) # the user can override any derived blocksizes by supplying `options` # handle case where no options are provided options = options or {} for key in options: kwargs[key] = options[key] def _rasterio_write_raster(filename): """ This is a wrapper around rasterio writing tiles to enable writing to a temporary location before rearranging the overviews within the file by gdal when required """ with rasterio.open(filename, 'w', **kwargs) as outds: if bands == 1: if isinstance(array, h5py.Dataset): for tile in tiles: idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) outds.write(array[idx], 1, window=tile) else: outds.write(array, 1) else: if isinstance(array, h5py.Dataset): for tile in tiles: idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) subs = array[:, idx[0], idx[1]] for i in range(bands): outds.write(subs[i], i + 1, window=tile) else: for i in range(bands): outds.write(array[i], i + 1) if tags is not None: outds.update_tags(**tags) # overviews/pyramids to disk if levels: outds.build_overviews(levels, resampling) if not levels: # write directly to disk without rewriting with gdal _rasterio_write_raster(filename) else: with tempfile.TemporaryDirectory() as tmpdir: out_fname = pjoin(tmpdir, basename(filename)) # first write to a temporary location _rasterio_write_raster(out_fname) # Creates the file at filename with the configured options # Will also move the overviews to the start of the file cmd = ['gdal_translate', '-co', '{}={}'.format('PREDICTOR', predictor[dtype])] for key, value in options.items(): cmd.extend(['-co', '{}={}'.format(key, value)]) if config_options: for key, value in config_options.items(): cmd.extend(['--config', '{}'.format(key), '{}'.format(value)]) cmd.extend([out_fname, filename]) subprocess.check_call(cmd, cwd=dirname(filename))
def exiting_angles(satellite_solar_group, slope_aspect_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ Calculates the exiting angle and the azimuthal exiting angle. :param satellite_solar_group: The root HDF5 `Group` that contains the satellite view and satellite azimuth datasets specified by the pathnames given by: * DatasetName.SATELLITE_VIEW * DatasetName.SATELLITE_AZIMUTH :param slope_aspect_group: The root HDF5 `Group` that contains the slope and aspect datasets specified by the pathnames given by: * DatasetName.SLOPE * DatasetName.ASPECT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.EXITING * DatasetName.AZIMUTHAL_EXITING :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # dataset arrays dname = DatasetName.SATELLITE_VIEW.value satellite_view_dataset = satellite_solar_group[dname] dname = DatasetName.SATELLITE_AZIMUTH.value satellite_azimuth_dataset = satellite_solar_group[dname] slope_dataset = slope_aspect_group[DatasetName.SLOPE.value] aspect_dataset = slope_aspect_group[DatasetName.ASPECT.value] geobox = GriddedGeoBox.from_dataset(satellite_view_dataset) shape = geobox.get_shape_yx() rows, cols = shape crs = geobox.crs.ExportToWkt() # Initialise the output files if out_group is None: fid = h5py.File('exiting-angles.h5', driver='core', backing_store=False) else: fid = out_group if GroupName.EXITING_GROUP.value not in fid: fid.create_group(GroupName.EXITING_GROUP.value) if filter_opts is None: filter_opts = {} grp = fid[GroupName.EXITING_GROUP.value] tile_size = satellite_view_dataset.chunks filter_opts['chunks'] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() no_data = -999 kwargs['shape'] = shape kwargs['fillvalue'] = no_data kwargs['dtype'] = 'float32' # output datasets dataset_name = DatasetName.EXITING.value exiting_dset = grp.create_dataset(dataset_name, **kwargs) dataset_name = DatasetName.AZIMUTHAL_EXITING.value azi_exit_dset = grp.create_dataset(dataset_name, **kwargs) # attach some attributes to the image datasets attrs = { 'crs_wkt': crs, 'geotransform': geobox.transform.to_gdal(), 'no_data_value': no_data } desc = "Contains the exiting angles in degrees." attrs['description'] = desc attrs['alias'] = 'exiting' attach_image_attributes(exiting_dset, attrs) desc = "Contains the azimuthal exiting angles in degrees." attrs['description'] = desc attrs['alias'] = 'azimuthal-exiting' attach_image_attributes(azi_exit_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start and end locations ystart = tile[0][0] xstart = tile[1][0] yend = tile[0][1] xend = tile[1][1] idx = (slice(ystart, yend), slice(xstart, xend)) # Tile size ysize = yend - ystart xsize = xend - xstart # Read the data for the current tile # Convert to required datatype and transpose sat_view = as_array(satellite_view_dataset[idx], dtype=numpy.float32, transpose=True) sat_azi = as_array(satellite_azimuth_dataset[idx], dtype=numpy.float32, transpose=True) slope = as_array(slope_dataset[idx], dtype=numpy.float32, transpose=True) aspect = as_array(aspect_dataset[idx], dtype=numpy.float32, transpose=True) # Initialise the work arrays exiting = numpy.zeros((ysize, xsize), dtype='float32') azi_exiting = numpy.zeros((ysize, xsize), dtype='float32') # Process the current tile exiting_angle(xsize, ysize, sat_view, sat_azi, slope, aspect, exiting.transpose(), azi_exiting.transpose()) # Write the current to disk exiting_dset[idx] = exiting azi_exit_dset[idx] = azi_exiting if out_group is None: return fid
def calc_contiguity_mask(acquisitions, platform_id): """ Determines locations of null values. Null values for every band are located in order to create band contiguity. :param acquisitions: A `list` of `acquisition` objects. :param platform_id: A `str` containing the platform id as given by `acquisition.platform_id`. :return: A single ndarray determining band/pixel contiguity. 1 for contiguous, 0 for non-contiguous. :notes: Attempts to flag thermal anomolies for Landsat 5TM as well. """ cols = acquisitions[0].samples rows = acquisitions[0].lines tiles = list(generate_tiles(cols, rows, cols)) logging.debug('Determining pixel contiguity') # Create mask array with True for all pixels which are non-zero in all # bands mask = numpy.zeros((rows, cols), dtype='bool') for tile in tiles: idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1])) stack, _ = stack_data(acquisitions, window=tile) mask[idx] = stack.all(0) # The following is only valid for Landsat 5 images logging.debug('calc_contiguity_mask: platform_id=%s', platform_id) if platform_id == 'LANDSAT_5': logging.debug('Finding thermal edge anomalies') # Apply thermal edge anomalies struct = numpy.ones((7, 7), dtype='bool') erode = ndimage.binary_erosion(mask, structure=struct) dims = mask.shape th_anom = numpy.zeros(dims, dtype='bool').flatten() pix_3buff_mask = mask - erode pix_3buff_mask[pix_3buff_mask > 0] = 1 edge = pix_3buff_mask == 1 low_sat = acquisitions[5].data() == 1 low_sat_buff = ndimage.binary_dilation(low_sat, structure=struct) s = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] low_sat, _ = ndimage.label(low_sat_buff, structure=s) labels = low_sat[edge] ulabels = numpy.unique(labels[labels > 0]) # Histogram method, a lot faster mx = numpy.max(ulabels) h = histogram(low_sat, minv=0, maxv=mx, reverse_indices='ri') hist = h['histogram'] ri = h['ri'] for i in numpy.arange(ulabels.shape[0]): if hist[ulabels[i]] == 0: continue th_anom[ri[ri[ulabels[i]]:ri[ulabels[i] + 1]]] = True th_anom = ~(th_anom.reshape(dims)) mask &= th_anom return mask
def test_exception_1(self): """Test empty image:""" for (samples, lines, xtile, ytile) in self.exception_input1: tiles_list = list(generate_tiles(samples, lines, xtile, ytile)) self.assertEqual(tiles_list, [], "Expected an empty tile list.")
def incident_angles( satellite_solar_group, slope_aspect_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Calculates the incident angle and the azimuthal incident angle. :param satellite_solar_group: The root HDF5 `Group` that contains the solar zenith and solar azimuth datasets specified by the pathnames given by: * DatasetName.SOLAR_ZENITH * DatasetName.SOLAR_AZIMUTH :param slope_aspect_group: The root HDF5 `Group` that contains the slope and aspect datasets specified by the pathnames given by: * DatasetName.SLOPE * DatasetName.ASPECT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.INCIDENT * DatasetName.AZIMUTHAL_INCIDENT :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # dataset arrays dname = DatasetName.SOLAR_ZENITH.value solar_zenith_dataset = satellite_solar_group[dname] dname = DatasetName.SOLAR_AZIMUTH.value solar_azimuth_dataset = satellite_solar_group[dname] slope_dataset = slope_aspect_group[DatasetName.SLOPE.value] aspect_dataset = slope_aspect_group[DatasetName.ASPECT.value] geobox = GriddedGeoBox.from_dataset(solar_zenith_dataset) shape = geobox.get_shape_yx() rows, cols = shape crs = geobox.crs.ExportToWkt() # Initialise the output files if out_group is None: fid = h5py.File("incident-angles.h5", "w", driver="core", backing_store=False) else: fid = out_group if GroupName.INCIDENT_GROUP.value not in fid: fid.create_group(GroupName.INCIDENT_GROUP.value) if filter_opts is None: filter_opts = {} grp = fid[GroupName.INCIDENT_GROUP.value] tile_size = solar_zenith_dataset.chunks filter_opts["chunks"] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() no_data = numpy.nan kwargs["shape"] = shape kwargs["fillvalue"] = no_data kwargs["dtype"] = "float32" # output datasets dataset_name = DatasetName.INCIDENT.value incident_dset = grp.create_dataset(dataset_name, **kwargs) dataset_name = DatasetName.AZIMUTHAL_INCIDENT.value azi_inc_dset = grp.create_dataset(dataset_name, **kwargs) # attach some attributes to the image datasets attrs = { "crs_wkt": crs, "geotransform": geobox.transform.to_gdal(), "no_data_value": no_data, } desc = "Contains the incident angles in degrees." attrs["description"] = desc attrs["alias"] = "incident" attach_image_attributes(incident_dset, attrs) desc = "Contains the azimuthal incident angles in degrees." attrs["description"] = desc attrs["alias"] = "azimuthal-incident" attach_image_attributes(azi_inc_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start and end locations ystart = tile[0][0] xstart = tile[1][0] yend = tile[0][1] xend = tile[1][1] idx = (slice(ystart, yend), slice(xstart, xend)) # Tile size ysize = yend - ystart xsize = xend - xstart # Read the data for the current tile # Convert to required datatype and transpose sol_zen = as_array(solar_zenith_dataset[idx], dtype=numpy.float32, transpose=True) sol_azi = as_array(solar_azimuth_dataset[idx], dtype=numpy.float32, transpose=True) slope = as_array(slope_dataset[idx], dtype=numpy.float32, transpose=True) aspect = as_array(aspect_dataset[idx], dtype=numpy.float32, transpose=True) # Initialise the work arrays incident = numpy.zeros((ysize, xsize), dtype="float32") azi_incident = numpy.zeros((ysize, xsize), dtype="float32") # Process the current tile incident_angle( xsize, ysize, sol_zen, sol_azi, slope, aspect, incident.transpose(), azi_incident.transpose(), ) # Write the current tile to disk incident_dset[idx] = incident azi_inc_dset[idx] = azi_incident if out_group is None: return fid
def relative_azimuth_slope( incident_angles_group, exiting_angles_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Calculates the relative azimuth angle on the slope surface. :param incident_angles_group: The root HDF5 `Group` that contains the azimuthal incident angle dataset specified by the pathname given by: * DatasetName.AZIMUTHAL_INCIDENT :param exiting_angles_group: The root HDF5 `Group` that contains the azimuthal exiting angle dataset specified by the pathname given by: * DatasetName.AZIMUTHAL_EXITING :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.RELATIVE_SLOPE :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # dataset arrays dname = DatasetName.AZIMUTHAL_INCIDENT.value azimuth_incident_dataset = incident_angles_group[dname] dname = DatasetName.AZIMUTHAL_EXITING.value azimuth_exiting_dataset = exiting_angles_group[dname] geobox = GriddedGeoBox.from_dataset(azimuth_incident_dataset) shape = geobox.get_shape_yx() rows, cols = shape crs = geobox.crs.ExportToWkt() # Initialise the output files if out_group is None: fid = h5py.File("relative-azimuth-angles.h5", "w", driver="core", backing_store=False) else: fid = out_group if GroupName.REL_SLP_GROUP.value not in fid: fid.create_group(GroupName.REL_SLP_GROUP.value) if filter_opts is None: filter_opts = {} grp = fid[GroupName.REL_SLP_GROUP.value] tile_size = azimuth_incident_dataset.chunks filter_opts["chunks"] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() no_data = numpy.nan kwargs["shape"] = shape kwargs["fillvalue"] = no_data kwargs["dtype"] = "float32" # output datasets out_dset = grp.create_dataset(DatasetName.RELATIVE_SLOPE.value, **kwargs) # attach some attributes to the image datasets attrs = { "crs_wkt": crs, "geotransform": geobox.transform.to_gdal(), "no_data_value": no_data, } desc = "Contains the relative azimuth angles on the slope surface in " "degrees." attrs["description"] = desc attrs["alias"] = "relative-slope" attach_image_attributes(out_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start and end locations ystart, yend = tile[0] xstart, xend = tile[1] idx = (slice(ystart, yend), slice(xstart, xend)) # Read the data for the current tile azi_inc = azimuth_incident_dataset[idx] azi_exi = azimuth_exiting_dataset[idx] # Process the tile rel_azi = azi_inc - azi_exi rel_azi[rel_azi <= -180.0] += 360.0 rel_azi[rel_azi > 180.0] -= 360.0 # Write the current tile to disk out_dset[idx] = rel_azi if out_group is None: return fid
def combine_shadow_masks(self_shadow_group, cast_shadow_sun_group, cast_shadow_satellite_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ A convienice function for combining the shadow masks into a single boolean array. :param self_shadow_group: The root HDF5 `Group` that contains the self shadow dataset specified by the pathname given by: * DatasetName.SELF_SHADOW :param cast_shadow_sun_group: The root HDF5 `Group` that contains the cast shadow (solar direction) dataset specified by the pathname given by: * DatasetName.CAST_SHADOW_FMT :param cast_shadow_sun_group: The root HDF5 `Group` that contains the cast shadow (satellite direction) dataset specified by the pathname given by: * DatasetName.CAST_SHDADOW_FMT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be given by the format string detailed by: * DatasetName.COMBINED_SHADOW :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # access the datasets dname_fmt = DatasetName.CAST_SHADOW_FMT.value self_shad = self_shadow_group[DatasetName.SELF_SHADOW.value] cast_sun = cast_shadow_sun_group[dname_fmt.format(source='SUN')] dname = dname_fmt.format(source='SATELLITE') cast_sat = cast_shadow_satellite_group[dname] geobox = GriddedGeoBox.from_dataset(self_shad) # Initialise the output files if out_group is None: fid = h5py.File('combined-shadow.h5', driver='core', backing_store=False) else: fid = out_group if GroupName.SHADOW_GROUP.value not in fid: fid.create_group(GroupName.SHADOW_GROUP.value) if filter_opts is None: filter_opts = {} else: filter_opts = filter_opts.copy() grp = fid[GroupName.SHADOW_GROUP.value] tile_size = cast_sun.chunks filter_opts['chunks'] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() cols, rows = geobox.get_shape_xy() kwargs['shape'] = (rows, cols) kwargs['dtype'] = 'bool' # output dataset out_dset = grp.create_dataset(DatasetName.COMBINED_SHADOW.value, **kwargs) # attach some attributes to the image datasets attrs = { 'crs_wkt': geobox.crs.ExportToWkt(), 'geotransform': geobox.transform.to_gdal() } desc = ("Combined shadow masks: 1. self shadow, " "2. cast shadow (solar direction), " "3. cast shadow (satellite direction).") attrs['description'] = desc attrs['mask_values'] = "False = Shadow; True = Non Shadow" attrs['alias'] = 'terrain-shadow' attach_image_attributes(out_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start locations ystart, yend = tile[0] xstart, xend = tile[1] idx = (slice(ystart, yend), slice(xstart, xend)) out_dset[idx] = (self_shad[idx] & cast_sun[idx] & cast_sat[idx]) if out_group is None: return fid
def self_shadow(incident_angles_group, exiting_angles_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ Computes the self shadow mask. :param incident_angles_group: The root HDF5 `Group` that contains the incident angle dataset specified by the pathname given by: * DatasetName.INCIDENT :param exiting_angles_group: The root HDF5 `Group` that contains the exiting angle dataset specified by the pathname given by: * DatasetName.EXITING :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset name will be given by: * DatasetName.SELF_SHADOW :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ incident_angle = incident_angles_group[DatasetName.INCIDENT.value] exiting_angle = exiting_angles_group[DatasetName.EXITING.value] geobox = GriddedGeoBox.from_dataset(incident_angle) # Initialise the output file if out_group is None: fid = h5py.File('self-shadow.h5', driver='core', backing_store=False) else: fid = out_group if filter_opts is None: filter_opts = {} else: filter_opts = filter_opts.copy() if GroupName.SHADOW_GROUP.value not in fid: fid.create_group(GroupName.SHADOW_GROUP.value) grp = fid[GroupName.SHADOW_GROUP.value] tile_size = exiting_angle.chunks filter_opts['chunks'] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() cols, rows = geobox.get_shape_xy() kwargs['shape'] = (rows, cols) kwargs['dtype'] = 'bool' # output dataset dataset_name = DatasetName.SELF_SHADOW.value out_dset = grp.create_dataset(dataset_name, **kwargs) # attach some attributes to the image datasets attrs = { 'crs_wkt': geobox.crs.ExportToWkt(), 'geotransform': geobox.transform.to_gdal() } desc = "Self shadow mask derived using the incident and exiting angles." attrs['description'] = desc attrs['alias'] = 'self-shadow' attach_image_attributes(out_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start locations ystart, yend = tile[0] xstart, xend = tile[1] idx = (slice(ystart, yend), slice(xstart, xend)) # Read the data for the current tile inc = numpy.radians(incident_angle[idx]) exi = numpy.radians(exiting_angle[idx]) # Process the tile mask = numpy.ones(inc.shape, dtype='uint8') mask[numpy.cos(inc) <= 0.0] = 0 mask[numpy.cos(exi) <= 0.0] = 0 # Write the current tile to disk out_dset[idx] = mask if out_group is None: return fid