def _write_cogtif(dataset, out_fname): """ Easy wrapper for writing a cogtif, that takes care of datasets that are written row by row rather square(ish) blocks. """ if dataset.chunks[1] == dataset.shape[1]: blockxsize = 512 blockysize = 512 data = dataset[:] else: blockysize, blockxsize = dataset.chunks data = dataset options = { 'blockxsize': blockxsize, 'blockysize': blockysize, 'compress': 'deflate', 'zlevel': 4 } nodata = dataset.attrs.get('no_data_value') geobox = GriddedGeoBox.from_dataset(dataset) # path existence if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(data, out_fname, cogtif=True, levels=LEVELS, nodata=nodata, geobox=geobox, resampling=Resampling.nearest, options=options)
def contiguity(fname, output): """ Write a contiguity mask file based on the intersection of valid data pixels across all bands from the input file and output to the specified directory """ with rasterio.open(fname) as ds: geobox = GriddedGeoBox.from_dataset(ds) yblock, xblock = ds.block_shapes[0] ones = np.ones((ds.height, ds.width), dtype='uint8') for band in ds.indexes: ones &= ds.read(band) > 0 co_options = { 'compress': 'deflate', 'zlevel': 4, 'blockxsize': xblock, 'blockysize': yblock } write_img(ones, output, cogtif=True, levels=[2, 4, 8, 16, 32], geobox=geobox, options=co_options) return None
def unpack_dataset(product_group, product_name, band): dataset = product_group[band] # human readable band name band_name = dataset.attrs["alias"] out_file = pjoin(outdir, "{}_{}.tif".format(product_name, band_name)) count_file = pjoin( outdir, "{}_{}_valid_pixel_count.tif".format(product_name, band_name)) nodata = dataset.attrs.get("no_data_value") geobox = GriddedGeoBox.from_dataset(dataset) data, count = sum_and_count(product_group, mask, band_name) # calculate the mean from sum and count mean = data / count mean[count == 0] = nodata mean = mean.astype("int16") write_img(mean, out_file, nodata=nodata, geobox=geobox, options=options) write_img(count, count_file, nodata=0, geobox=geobox, options=options)
def get_land_sea_mask(gridded_geo_box, \ ancillary_path='/g/data/v10/eoancillarydata/Land_Sea_Rasters'): """ Return a land/sea 2D numpy boolean array in which Land = True, Sea = False for the supplied GriddedGeoBox and using the UTM projected data in the supplied ancillary_path. If the specified gridded_geo_box has a non-UTM CRS or a non-native sample frequency, the data will be reprojected/resampled into the the gridded_geo_box. """ # get lat/long of geo_box origin to_crs = osr.SpatialReference() to_crs.SetFromUserInput('EPSG:4326') origin_longlat = gridded_geo_box.transform_coordinates( gridded_geo_box.origin, to_crs) # get Land/Sea data file for this bounding box utmZone = abs(get_utm_zone(origin_longlat)) utmDataPath = '%s/WORLDzone%d.tif' % (ancillary_path, utmZone) # read the land/sea data with rio.open(utmDataPath) as ds: # get the gridded box for the full dataset extent landSeaDataGGB = GriddedGeoBox.from_dataset(ds) # read the subset relating to Flinders Islet window = landSeaDataGGB.window(gridded_geo_box) out = numpy.zeros(gridded_geo_box.shape, dtype=numpy.uint8) ds.read(1, window=window, out=out) return out
def calculate_average(dataframe): """ Given a dataframe with the columns: * filename * band_name Calculate the 3D/timeseries average from all input records. Each 2D dataset has dimensions (73y, 144x), and type float32. """ dims = (dataframe.shape[0], 73, 144) data = numpy.zeros(dims, dtype="float32") # load all data into 3D array (dims are small so just read all) for i, rec in enumerate(dataframe.iterrows()): row = rec[1] with h5py.File(row.filename, "r") as fid: ds = fid[row.band_name] ds.read_direct(data[i]) no_data = float(ds.attrs['missing_value']) # check for nodata and convert to nan # do this for each dataset in case the nodata value changes data[i][data[i] == no_data] = numpy.nan # get the geobox, chunks with h5py.File(row.filename, "r") as fid: ds = fid[row.dataset_name] geobox = GriddedGeoBox.from_dataset(ds) chunks = ds.chunks mean = numpy.nanmean(data, axis=0) return mean, geobox, chunks
def write_tif_from_dataset(dataset, out_fname, options, config_options, overviews=True, nodata=None, geobox=None): """ Method to write a h5 dataset or numpy array to a tif file :param dataset: h5 dataset containing a numpy array or numpy array Dataset will map to the raster data :param out_fname: destination of the tif :param options: dictionary of options provided to gdal :param config_options: dictionary of configurations provided to gdal :param overviews: boolean flag to create overviews default (True) returns the out_fname param """ if hasattr(dataset, "chunks"): data = dataset[:] else: data = dataset if nodata is None and hasattr(dataset, "attrs"): nodata = dataset.attrs.get("no_data_value") if geobox is None: geobox = GriddedGeoBox.from_dataset(dataset) # path existence if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img( data, out_fname, levels=LEVELS, nodata=nodata, geobox=geobox, resampling=Resampling.average, options=options, config_options=config_options, ) return out_fname
def _append_info(ds_paths, bnames, no_data, geoboxes, parent, name, obj): """ Append the required info for the target dataset. """ if obj.attrs.get("CLASS") == "IMAGE": no_data.append(obj.attrs.get("no_data_value")) vrt_path = PATH_FMT.format(basename(obj.file.filename), obj.name) ds_paths.append(vrt_path) geoboxes.append(GriddedGeoBox.from_dataset(obj)) if parent: bnames.append(FMT.format(basename(obj.parent.name), name)) else: bnames.append(name)
def contiguity(fname): """ Write a contiguity mask file based on the intersection of valid data pixels across all bands from the input file and returns with the geobox of the source dataset """ with rasterio.open(fname) as ds: geobox = GriddedGeoBox.from_dataset(ds) yblock, xblock = ds.block_shapes[0] ones = np.ones((ds.height, ds.width), dtype="uint8") for band in ds.indexes: ones &= ds.read(band) > 0 return ones, geobox
def convert_image(dataset, output_directory): """ Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff, with deflate zlevel 1 compression. Any attributes stored with the image will be written as dataset level metadata tags, and not band level tags. All attributes will also be written to a yaml file. :param dataset: A HDF5 `IMAGE` Class dataset. :param output_directory: A filesystem path to the directory that will be the root directory for any images extracted. :return: None, outputs are written directly to disk. """ geobox = GriddedGeoBox.from_dataset(dataset) tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE} if "no_data_value" in tags: no_data = tags.pop("no_data_value") else: no_data = None tags["history"] = "Converted from HDF5 IMAGE to GeoTiff." # TODO: get x & y chunks from 3D images kwargs = { "driver": "GTiff", "geobox": geobox, "options": { "zlevel": 1, "compress": "deflate" }, "tags": tags, "nodata": no_data, } base_fname = pjoin(output_directory, normpath(dataset.name.strip("/"))) out_fname = "".join([base_fname, ".tif"]) if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, **kwargs) out_fname = "".join([base_fname, ".yaml"]) tags = {k: v for k, v in dataset.attrs.items()} with open(out_fname, "w") as src: yaml.dump(tags, src, default_flow_style=False, indent=4)
def convert_image(dataset, output_directory): """ Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff, with deflate zlevel 1 compression. Any attributes stored with the image will be written as dataset level metadata tags, and not band level tags. All attributes will also be written to a yaml file. :param dataset: A HDF5 `IMAGE` Class dataset. :param output_directory: A filesystem path to the directory that will be the root directory for any images extracted. :return: None, outputs are written directly to disk. """ geobox = GriddedGeoBox.from_dataset(dataset) tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE} if 'no_data_value' in tags: no_data = tags.pop('no_data_value') else: no_data = None tags['history'] = "Converted from HDF5 IMAGE to GeoTiff." # TODO: get x & y chunks from 3D images kwargs = { 'driver': 'GTiff', 'geobox': geobox, 'options': { 'zlevel': 1, 'compress': 'deflate' }, 'tags': tags, 'nodata': no_data } base_fname = pjoin(output_directory, normpath(dataset.name.strip('/'))) out_fname = ''.join([base_fname, '.tif']) if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, **kwargs) out_fname = ''.join([base_fname, '.yaml']) tags = {k: v for k, v in dataset.attrs.items()} with open(out_fname, 'w') as src: yaml.dump(tags, src, default_flow_style=False, indent=4)
def wagl_unpack(scene, granule, h5group, outdir): """ Unpack and package the NBAR and NBART products. """ # listing of all datasets of IMAGE CLASS type img_paths = find(h5group, 'IMAGE') for product in PRODUCTS: for pathname in [p for p in img_paths if '/{}/'.format(product) in p]: dataset = h5group[pathname] if dataset.attrs['band_name'] == 'BAND-9': # TODO re-work so that a valid BAND-9 from another sensor isn't skipped continue acqs = scene.get_acquisitions(group=pathname.split('/')[0], granule=granule) acq = [a for a in acqs if a.band_name == dataset.attrs['band_name']][0] # base_dir = pjoin(splitext(basename(acq.pathname))[0], granule) base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0]) match_dict = PATTERN.match(base_fname).groupdict() fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product, match_dict.get('band_name'), match_dict.get('extension')) out_fname = pjoin(outdir, # base_dir.replace('L1C', 'ARD'), # granule.replace('L1C', 'ARD'), product, fname.replace('L1C', 'ARD')) # output if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, cogtif=True, levels=LEVELS, nodata=dataset.attrs['no_data_value'], geobox=GriddedGeoBox.from_dataset(dataset), resampling=Resampling.nearest, options={'blockxsize': dataset.chunks[1], 'blockysize': dataset.chunks[0], 'compress': 'deflate', 'zlevel': 4}) # retrieve metadata scalar_paths = find(h5group, 'SCALAR') pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0] tags = yaml.load(h5group[pathname][()]) return tags
def get_img_dataset_info(dataset, path, layer=1): """ Returns metadata for raster datasets """ geobox = GriddedGeoBox.from_dataset(dataset) return { 'path': path, 'layer': layer, 'info': { 'width': geobox.x_size(), 'height': geobox.y_size(), 'geotransform': list(geobox.transform.to_gdal()) } }
def get_img_dataset_info(dataset, path, layer=1): """ Returns metadata for raster datasets """ geobox = GriddedGeoBox.from_dataset(dataset) return { "path": path, "layer": layer, "info": { "width": geobox.x_size(), "height": geobox.y_size(), "geotransform": list(geobox.transform.to_gdal()), }, }
def read_subset(fname, ul_xy, ur_xy, lr_xy, ll_xy, bands=1): """ Return a 2D or 3D NumPy array subsetted to the given bounding extents. :param fname: A string containing the full file pathname to an image on disk. :param ul_xy: A tuple containing the Upper Left (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param ur_xy: A tuple containing the Upper Right (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param lr_xy: A tuple containing the Lower Right (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param ll_xy: A tuple containing the Lower Left (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param bands: Can be an integer of list of integers representing the band(s) to be read from disk. If bands is a list, then the returned subset will be 3D, otherwise the subset will be strictly 2D. :return: A tuple of 3 elements: * 1. 2D or 3D NumPy array containing the image subset. * 2. A list of length 6 containing the GDAL geotransform. * 3. A WKT formatted string representing the co-ordinate reference system (projection). :additional notes: The ending array co-ordinates are increased by +1, i.e. xend = 270 + 1 to account for Python's [inclusive, exclusive) index notation. """ if isinstance(fname, h5py.Dataset): geobox = GriddedGeoBox.from_dataset(fname) prj = fname.attrs['crs_wkt'] else: # Open the file with rasterio.open(fname) as src: # Get the inverse transform of the affine co-ordinate reference geobox = GriddedGeoBox.from_dataset(src) prj = src.crs.wkt # rasterio returns a unicode inv = ~geobox.transform rows, cols = geobox.shape # Convert each map co-ordinate to image/array co-ordinates img_ul_x, img_ul_y = [int(v) for v in inv * ul_xy] img_ur_x, img_ur_y = [int(v) for v in inv * ur_xy] img_lr_x, img_lr_y = [int(v) for v in inv * lr_xy] img_ll_x, img_ll_y = [int(v) for v in inv * ll_xy] # Calculate the min and max array extents # The ending array extents have +1 to account for Python's # [inclusive, exclusive) index notation. xstart = min(img_ul_x, img_ll_x) ystart = min(img_ul_y, img_ur_y) xend = max(img_ur_x, img_lr_x) + 1 yend = max(img_ll_y, img_lr_y) + 1 # Check for out of bounds if (((xstart < 0) or (ystart < 0)) or ((xend -1 > cols) or (yend -1 > rows))): msg = ("Error! Attempt to read a subset that is outside of the" "image domain. Index: ({ys}, {ye}), ({xs}, {xe}))") msg = msg.format(ys=ystart, ye=yend, xs=xstart, xe=xend) raise IndexError(msg) if isinstance(fname, h5py.Dataset): subs = fname[ystart:yend, xstart:xend] else: with rasterio.open(fname) as src: subs = src.read(bands, window=((ystart, yend), (xstart, xend))) # Get the new UL co-ordinates of the array ul_x, ul_y = geobox.transform * (xstart, ystart) geobox_subs = GriddedGeoBox(shape=subs.shape, origin=(ul_x, ul_y), pixelsize=geobox.pixelsize, crs=prj) return (subs, geobox_subs)
def get_dsm( acquisition, pathname, buffer_distance=8000, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Given an acquisition and a national Digitial Surface Model, extract a subset from the DSM based on the acquisition extents plus an x & y margins. The subset is then smoothed with a 3x3 gaussian filter. A square margins is applied to the extents. :param acquisition: An instance of an acquisition object. :param pathname: A string pathname of the DSM with a ':' to seperate the filename from the import HDF5 dataset name. :param buffer_distance: A number representing the desired distance (in the same units as the acquisition) in which to calculate the extra number of pixels required to buffer an image. Default is 8000. :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset name will be as follows: * DatasetName.DSM_SMOOTHED :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # Use the 1st acquisition to setup the geobox geobox = acquisition.gridded_geo_box() shape = geobox.get_shape_yx() # buffered image extents/margins margins = pixel_buffer(acquisition, buffer_distance) # Get the dimensions and geobox of the new image dem_cols = shape[1] + margins.left + margins.right dem_rows = shape[0] + margins.top + margins.bottom dem_shape = (dem_rows, dem_cols) dem_origin = geobox.convert_coordinates( (0 - margins.left, 0 - margins.top)) dem_geobox = GriddedGeoBox( dem_shape, origin=dem_origin, pixelsize=geobox.pixelsize, crs=geobox.crs.ExportToWkt(), ) # split the DSM filename, dataset name, and load fname, dname = pathname.split(":") with h5py.File(fname, "r") as dsm_fid: dsm_ds = dsm_fid[dname] dsm_geobox = GriddedGeoBox.from_dataset(dsm_ds) # calculate full border extents into CRS of DSM extents = dem_geobox.project_extents(dsm_geobox.crs) ul_xy = (extents[0], extents[3]) ur_xy = (extents[2], extents[3]) lr_xy = (extents[2], extents[1]) ll_xy = (extents[0], extents[1]) # load the subset and corresponding geobox subs, subs_geobox = read_subset(dsm_ds, ul_xy, ur_xy, lr_xy, ll_xy, edge_buffer=1) # ancillary metadata tracking metadata = current_h5_metadata(dsm_fid, dataset_path=dname) # Retrive the DSM data dsm_data = reproject_array_to_array(subs, subs_geobox, dem_geobox, resampling=Resampling.bilinear) # free memory subs = None # Output the reprojected result # Initialise the output files if out_group is None: fid = h5py.File("dsm-subset.h5", "w", driver="core", backing_store=False) else: fid = out_group if filter_opts is None: filter_opts = {} else: filter_opts = filter_opts.copy() if acquisition.tile_size[0] == 1: filter_opts["chunks"] = (1, dem_cols) else: # TODO: rework the tiling regime for larger dsm # for non single row based tiles, we won't have ideal # matching reads for tiled processing between the acquisition # and the DEM filter_opts["chunks"] = acquisition.tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() group = fid.create_group(GroupName.ELEVATION_GROUP.value) param_grp = group.create_group("PARAMETERS") param_grp.attrs["left_buffer"] = margins.left param_grp.attrs["right_buffer"] = margins.right param_grp.attrs["top_buffer"] = margins.top param_grp.attrs["bottom_buffer"] = margins.bottom # dataset attributes attrs = { "crs_wkt": geobox.crs.ExportToWkt(), "geotransform": dem_geobox.transform.to_gdal(), } # Smooth the DSM dsm_data = filter_dsm(dsm_data) dname = DatasetName.DSM_SMOOTHED.value out_sm_dset = group.create_dataset(dname, data=dsm_data, **kwargs) desc = "A subset of a Digital Surface Model smoothed with a gaussian " "kernel." attrs["description"] = desc attrs["id"] = numpy.array([metadata["id"]], VLEN_STRING) attach_image_attributes(out_sm_dset, attrs) if out_group is None: return fid
def read_subset(fname, ul_xy, ur_xy, lr_xy, ll_xy, edge_buffer=0, bands=1): """ Return a 2D or 3D NumPy array subsetted to the given bounding extents. The function will allow a user to ask for a region outside of the requested domain. Those elements that fall outside of the requested domain will be populated with the datasets' fillvalue or 0 if the fillvalue is None. :param fname: A string containing the full file pathname to an image on disk. OR an HDF5 Dataset (h5py.Dataset). :param ul_xy: A tuple containing the Upper Left (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param ur_xy: A tuple containing the Upper Right (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param lr_xy: A tuple containing the Lower Right (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param ll_xy: A tuple containing the Lower Left (x,y) co-ordinate pair in real world (map) co-ordinates. Co-ordinate pairs can be (longitude, latitude) or (eastings, northings), but they must be of the same reference as the image of interest. :param edge_buffer: An integer indicating the additional number of pixels to read along each edge of the subset. Useful for when additional data might be required, such as for reprojection. Default is 0 pixels on each edge. :param bands: Can be an integer of list of integers representing the band(s) to be read from disk. If bands is a list, then the returned subset will be 3D, otherwise the subset will be strictly 2D. :return: A tuple of 2 elements: * 1. 2D or 3D NumPy array containing the requested region * 2. An instance of a GriddedGeoBox covering the requested region :additional notes: The array dimensions are determined via the supplied ROI. As such, the returned array will use a fill value for the pixels falling outside of the dataset we're reading from. """ if isinstance(fname, h5py.Dataset): geobox = GriddedGeoBox.from_dataset(fname) prj = fname.attrs['crs_wkt'] dtype = fname.dtype fillv = fname.attrs.get('fillvalue') elif isinstance(fname, rasterio.io.DatasetReader): # Get the inverse transform of the affine co-ordinate reference geobox = GriddedGeoBox.from_dataset(fname) prj = fname.crs.wkt # rasterio returns a unicode dtype = fname.dtypes[0] fillv = fname.nodata elif isinstance(fname, str): # Open the file with rasterio.open(fname) as src: # Get the inverse transform of the affine co-ordinate reference geobox = GriddedGeoBox.from_dataset(src) prj = src.crs.wkt # rasterio returns a unicode dtype = src.dtypes[0] fillv = src.nodata else: raise ValueError('Unexpected file description of type {}'.format(type(fname))) inv = ~geobox.transform rows, cols = geobox.shape # fillvalue will default to zero if None fillv = 0 if fillv is None else fillv # Convert each map co-ordinate to image/array co-ordinates img_ul_x, img_ul_y = [int(round(v)) for v in inv * ul_xy] img_ur_x, img_ur_y = [int(round(v)) for v in inv * ur_xy] img_lr_x, img_lr_y = [int(round(v)) for v in inv * lr_xy] img_ll_x, img_ll_y = [int(round(v)) for v in inv * ll_xy] # Calculate the min and max array extents including edge_buffer xstart = min(img_ul_x, img_ll_x) - edge_buffer ystart = min(img_ul_y, img_ur_y) - edge_buffer xend = max(img_ur_x, img_lr_x) + edge_buffer yend = max(img_ll_y, img_lr_y) + edge_buffer # intialise the output array dims = (yend - ystart, xend - xstart) subs = np.full(dims, fillv, dtype=dtype) # Get the new UL co-ordinates of the array ul_x, ul_y = geobox.transform * (xstart, ystart) geobox_subs = GriddedGeoBox(shape=subs.shape, origin=(ul_x, ul_y), pixelsize=geobox.pixelsize, crs=prj) # test for intersection if not geobox_subs.intersects(geobox): raise IndexError("Requested Subset Does Not Intersect With Array") # intersected region (source index xy start and end coords) source_xs = max(0, xstart) source_ys = max(0, ystart) source_xe = min(cols, xend) source_ye = min(rows, yend) # source indices/slice source_idx = np.s_[source_ys:source_ye, source_xs:source_xe] # destination origin/start index (UL) coords -> abs(min(0, ul)) dest_xs = abs(min(0, xstart)) dest_ys = abs(min(0, ystart)) # destination end (LR) -> (source_end - source_start) + dest_start dest_xe = (source_xe - source_xs) + dest_xs dest_ye = (source_ye - source_ys) + dest_ys # destination indices/slice dest_idx = np.s_[dest_ys:dest_ye, dest_xs:dest_xe] if isinstance(fname, h5py.Dataset): fname.read_direct(subs, source_idx, dest_idx) elif isinstance(fname, rasterio.io.DatasetReader): window = ((source_ys, source_ye), (source_xs, source_xe)) fname.read(bands, window=window, out=subs[dest_idx]) elif isinstance(fname, str): with rasterio.open(fname) as src: window = ((source_ys, source_ye), (source_xs, source_xe)) src.read(bands, window=window, out=subs[dest_idx]) else: raise ValueError('Unexpected file description of type {}'.format(type(fname))) return (subs, geobox_subs)
def mndwi(wagl_h5_file, granule, out_fname): """ Computes the mndwi for a given granule in a wagl h5 file. Parameters ---------- wagl_h5_file : str wagl-water-atcor generated h5 file granule : str Group path of the granule within the h5 file out_fname : str Output filename of the h5 file """ # specify the reflectance products to use in generating mndwi products = ["LMBADJ"] # specify the resampling approach for the SWIR band resample_approach = Resampling.bilinear h5_fid = h5py.File(out_fname, "w") # find the granule index in the wagl_h5_file fid = h5py.File(wagl_h5_file, "r") granule_fid = fid[granule] paths = find(granule_fid, "IMAGE") # get platform name md = yaml.load(fid[granule + "/METADATA/CURRENT"][()], Loader=yaml.FullLoader) platform_id = md["source_datasets"]["platform_id"] # store mndwi-based products into a group mndwi_grp = h5_fid.create_group("mndwi") for i, prod in enumerate(products): # search the h5 groups & get paths to the green and swir bands green_path, swir_path = get_mndwi_bands(granule, platform_id, prod, paths) green_ds = granule_fid[green_path] chunks = green_ds.chunks nRows, nCols = green_ds.shape geobox = GriddedGeoBox.from_dataset(green_ds) nodata = green_ds.attrs["no_data_value"] # create output h5 attributes desc = "MNDWI derived with {0} and {1} ({2} reflectances)".format( psplit(green_path)[-1], psplit(swir_path)[-1], prod, ) attrs = { "crs_wkt": geobox.crs.ExportToWkt(), "geotransform": geobox.transform.to_gdal(), "no_data_value": nodata, "granule": granule, "description": desc, "platform": platform_id, "spatial_resolution": abs(geobox.transform.a), } if platform_id.startswith("SENTINEL_2"): # we need to upscale the swir band swir_ds = granule_fid[swir_path] swir_im = reproject_array_to_array( src_img=swir_ds[:], src_geobox=GriddedGeoBox.from_dataset(swir_ds), dst_geobox=geobox, src_nodata=swir_ds.attrs["no_data_value"], dst_nodata=nodata, resampling=resample_approach, ) attrs["SWIR_resampling_method"] = resample_approach.name else: swir_im = granule_fid[swir_path][:] # ------------------------- # # Compute mndwi via tiles # # and save tiles to h5 # # ------------------------- # tiles = generate_tiles(samples=nRows, lines=nCols, xtile=chunks[1], ytile=chunks[0]) # create mndwi dataset mndwi_ds = mndwi_grp.create_dataset( f"mndwi_image_{prod}", shape=(nRows, nCols), dtype="float32", compression="lzf", chunks=chunks, shuffle=True, ) for tile in tiles: green_tile = green_ds[tile] swir_tile = swir_im[tile] mndwi_tile = compute_mndwi(green_tile, swir_tile) # perform masking mask = ((green_tile == nodata) | (swir_tile == nodata) | (~np.isfinite(mndwi_tile))) mndwi_tile[mask] = nodata mndwi_ds[tile] = mndwi_tile # add attrs to dataset attach_image_attributes(mndwi_ds, attrs) fid.close() h5_fid.close()
def run(self): # Subdirectory in the task workdir workdir = pjoin(self.workdir, "gverify") if not exists(workdir): os.makedirs(workdir) # Get acquisition metadata, limit it to executing granule container = acquisitions( self.level1, self.acq_parser_hint).get_granule(self.granule, container=True) acq_info = acquisition_info(container, self.granule) # Initialise output variables for error case error_msg = "" ref_date = "" ref_source_path = "" reference_resolution = "" try: # retrieve a set of matching landsat scenes # lookup is based on polygon for Sentinel-2 landsat_scenes = acq_info.intersecting_landsat_scenes( self.landsat_scenes_shapefile) def fixed_extra_parameters(): points_txt = pjoin(workdir, "points.txt") collect_gcp(self.root_fix_qa_location, landsat_scenes, points_txt) return ["-t", "FIXED_LOCATION", "-t_file", points_txt] if acq_info.is_land_tile(self.ocean_tile_list): location = acq_info.land_band() # for sentinel-2 land tiles we prefer grid points # rather than GCPs if acq_info.preferred_gverify_method == "grid": extra = ["-g", self.grid_size] else: extra = fixed_extra_parameters() else: # for sea tiles we always pick GCPs location = acq_info.ocean_band() extra = fixed_extra_parameters() # Extract the source band from the results archive with h5py.File(self.input()[0].path, "r") as h5: band_id = h5[location].attrs["band_id"] source_band = pjoin(workdir, "source-BAND-{}.tif".format(band_id)) source_image = h5[location][:] source_image[source_image == -999] = 0 write_img( source_image, source_band, geobox=GriddedGeoBox.from_dataset(h5[location]), nodata=0, options={ "compression": "deflate", "zlevel": 1 }, ) # returns a reference image from one of ls5/7/8 # the gqa band id will differ depending on if the source image is 5/7/8 reference_imagery = get_reference_imagery( landsat_scenes, acq_info.timestamp, band_id, acq_info.tag, [self.reference_directory, self.backup_reference_directory], ) ref_date = get_reference_date( basename(reference_imagery[0].filename), band_id, acq_info.tag) ref_source_path = reference_imagery[0].filename # reference resolution is required for the gqa calculation reference_resolution = [ abs(x) for x in most_common(reference_imagery).resolution ] vrt_file = pjoin(workdir, "reference.vrt") build_vrt(reference_imagery, vrt_file, workdir) self._run_gverify( vrt_file, source_band, outdir=workdir, extra=extra, resampling=acq_info.preferred_resampling_method, ) except (ValueError, FileNotFoundError, CommandError) as ve: error_msg = str(ve) TASK_LOGGER.error( task=self.get_task_family(), params=self.to_str_params(), level1=self.level1, exception="gverify was not executed because:\n {}".format( error_msg), ) finally: # Write out runtime data to be processed by the gqa task run_args = { "executable": self.executable, "ref_resolution": reference_resolution, "ref_date": (ref_date.isoformat() if ref_date else ""), "ref_source_path": str(ref_source_path), "granule": str(self.granule), "error_msg": str(error_msg), } with self.output()["runtime_args"].open("w") as fd: write_yaml(run_args, fd) # if gverify failed to product the .res file writ out a blank one if not exists(self.output()["results"].path): with self.output()["results"].open("w") as fd: pass
def incident_angles( satellite_solar_group, slope_aspect_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Calculates the incident angle and the azimuthal incident angle. :param satellite_solar_group: The root HDF5 `Group` that contains the solar zenith and solar azimuth datasets specified by the pathnames given by: * DatasetName.SOLAR_ZENITH * DatasetName.SOLAR_AZIMUTH :param slope_aspect_group: The root HDF5 `Group` that contains the slope and aspect datasets specified by the pathnames given by: * DatasetName.SLOPE * DatasetName.ASPECT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.INCIDENT * DatasetName.AZIMUTHAL_INCIDENT :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # dataset arrays dname = DatasetName.SOLAR_ZENITH.value solar_zenith_dataset = satellite_solar_group[dname] dname = DatasetName.SOLAR_AZIMUTH.value solar_azimuth_dataset = satellite_solar_group[dname] slope_dataset = slope_aspect_group[DatasetName.SLOPE.value] aspect_dataset = slope_aspect_group[DatasetName.ASPECT.value] geobox = GriddedGeoBox.from_dataset(solar_zenith_dataset) shape = geobox.get_shape_yx() rows, cols = shape crs = geobox.crs.ExportToWkt() # Initialise the output files if out_group is None: fid = h5py.File("incident-angles.h5", "w", driver="core", backing_store=False) else: fid = out_group if GroupName.INCIDENT_GROUP.value not in fid: fid.create_group(GroupName.INCIDENT_GROUP.value) if filter_opts is None: filter_opts = {} grp = fid[GroupName.INCIDENT_GROUP.value] tile_size = solar_zenith_dataset.chunks filter_opts["chunks"] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() no_data = numpy.nan kwargs["shape"] = shape kwargs["fillvalue"] = no_data kwargs["dtype"] = "float32" # output datasets dataset_name = DatasetName.INCIDENT.value incident_dset = grp.create_dataset(dataset_name, **kwargs) dataset_name = DatasetName.AZIMUTHAL_INCIDENT.value azi_inc_dset = grp.create_dataset(dataset_name, **kwargs) # attach some attributes to the image datasets attrs = { "crs_wkt": crs, "geotransform": geobox.transform.to_gdal(), "no_data_value": no_data, } desc = "Contains the incident angles in degrees." attrs["description"] = desc attrs["alias"] = "incident" attach_image_attributes(incident_dset, attrs) desc = "Contains the azimuthal incident angles in degrees." attrs["description"] = desc attrs["alias"] = "azimuthal-incident" attach_image_attributes(azi_inc_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start and end locations ystart = tile[0][0] xstart = tile[1][0] yend = tile[0][1] xend = tile[1][1] idx = (slice(ystart, yend), slice(xstart, xend)) # Tile size ysize = yend - ystart xsize = xend - xstart # Read the data for the current tile # Convert to required datatype and transpose sol_zen = as_array(solar_zenith_dataset[idx], dtype=numpy.float32, transpose=True) sol_azi = as_array(solar_azimuth_dataset[idx], dtype=numpy.float32, transpose=True) slope = as_array(slope_dataset[idx], dtype=numpy.float32, transpose=True) aspect = as_array(aspect_dataset[idx], dtype=numpy.float32, transpose=True) # Initialise the work arrays incident = numpy.zeros((ysize, xsize), dtype="float32") azi_incident = numpy.zeros((ysize, xsize), dtype="float32") # Process the current tile incident_angle( xsize, ysize, sol_zen, sol_azi, slope, aspect, incident.transpose(), azi_incident.transpose(), ) # Write the current tile to disk incident_dset[idx] = incident azi_inc_dset[idx] = azi_incident if out_group is None: return fid
def relative_azimuth_slope( incident_angles_group, exiting_angles_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None, ): """ Calculates the relative azimuth angle on the slope surface. :param incident_angles_group: The root HDF5 `Group` that contains the azimuthal incident angle dataset specified by the pathname given by: * DatasetName.AZIMUTHAL_INCIDENT :param exiting_angles_group: The root HDF5 `Group` that contains the azimuthal exiting angle dataset specified by the pathname given by: * DatasetName.AZIMUTHAL_EXITING :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.RELATIVE_SLOPE :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # dataset arrays dname = DatasetName.AZIMUTHAL_INCIDENT.value azimuth_incident_dataset = incident_angles_group[dname] dname = DatasetName.AZIMUTHAL_EXITING.value azimuth_exiting_dataset = exiting_angles_group[dname] geobox = GriddedGeoBox.from_dataset(azimuth_incident_dataset) shape = geobox.get_shape_yx() rows, cols = shape crs = geobox.crs.ExportToWkt() # Initialise the output files if out_group is None: fid = h5py.File("relative-azimuth-angles.h5", "w", driver="core", backing_store=False) else: fid = out_group if GroupName.REL_SLP_GROUP.value not in fid: fid.create_group(GroupName.REL_SLP_GROUP.value) if filter_opts is None: filter_opts = {} grp = fid[GroupName.REL_SLP_GROUP.value] tile_size = azimuth_incident_dataset.chunks filter_opts["chunks"] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() no_data = numpy.nan kwargs["shape"] = shape kwargs["fillvalue"] = no_data kwargs["dtype"] = "float32" # output datasets out_dset = grp.create_dataset(DatasetName.RELATIVE_SLOPE.value, **kwargs) # attach some attributes to the image datasets attrs = { "crs_wkt": crs, "geotransform": geobox.transform.to_gdal(), "no_data_value": no_data, } desc = "Contains the relative azimuth angles on the slope surface in " "degrees." attrs["description"] = desc attrs["alias"] = "relative-slope" attach_image_attributes(out_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start and end locations ystart, yend = tile[0] xstart, xend = tile[1] idx = (slice(ystart, yend), slice(xstart, xend)) # Read the data for the current tile azi_inc = azimuth_incident_dataset[idx] azi_exi = azimuth_exiting_dataset[idx] # Process the tile rel_azi = azi_inc - azi_exi rel_azi[rel_azi <= -180.0] += 360.0 rel_azi[rel_azi > 180.0] -= 360.0 # Write the current tile to disk out_dset[idx] = rel_azi if out_group is None: return fid
def exiting_angles(satellite_solar_group, slope_aspect_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ Calculates the exiting angle and the azimuthal exiting angle. :param satellite_solar_group: The root HDF5 `Group` that contains the satellite view and satellite azimuth datasets specified by the pathnames given by: * DatasetName.SATELLITE_VIEW * DatasetName.SATELLITE_AZIMUTH :param slope_aspect_group: The root HDF5 `Group` that contains the slope and aspect datasets specified by the pathnames given by: * DatasetName.SLOPE * DatasetName.ASPECT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be as follows: * DatasetName.EXITING * DatasetName.AZIMUTHAL_EXITING :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # dataset arrays dname = DatasetName.SATELLITE_VIEW.value satellite_view_dataset = satellite_solar_group[dname] dname = DatasetName.SATELLITE_AZIMUTH.value satellite_azimuth_dataset = satellite_solar_group[dname] slope_dataset = slope_aspect_group[DatasetName.SLOPE.value] aspect_dataset = slope_aspect_group[DatasetName.ASPECT.value] geobox = GriddedGeoBox.from_dataset(satellite_view_dataset) shape = geobox.get_shape_yx() rows, cols = shape crs = geobox.crs.ExportToWkt() # Initialise the output files if out_group is None: fid = h5py.File('exiting-angles.h5', driver='core', backing_store=False) else: fid = out_group if GroupName.EXITING_GROUP.value not in fid: fid.create_group(GroupName.EXITING_GROUP.value) if filter_opts is None: filter_opts = {} grp = fid[GroupName.EXITING_GROUP.value] tile_size = satellite_view_dataset.chunks filter_opts['chunks'] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() no_data = -999 kwargs['shape'] = shape kwargs['fillvalue'] = no_data kwargs['dtype'] = 'float32' # output datasets dataset_name = DatasetName.EXITING.value exiting_dset = grp.create_dataset(dataset_name, **kwargs) dataset_name = DatasetName.AZIMUTHAL_EXITING.value azi_exit_dset = grp.create_dataset(dataset_name, **kwargs) # attach some attributes to the image datasets attrs = { 'crs_wkt': crs, 'geotransform': geobox.transform.to_gdal(), 'no_data_value': no_data } desc = "Contains the exiting angles in degrees." attrs['description'] = desc attrs['alias'] = 'exiting' attach_image_attributes(exiting_dset, attrs) desc = "Contains the azimuthal exiting angles in degrees." attrs['description'] = desc attrs['alias'] = 'azimuthal-exiting' attach_image_attributes(azi_exit_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start and end locations ystart = tile[0][0] xstart = tile[1][0] yend = tile[0][1] xend = tile[1][1] idx = (slice(ystart, yend), slice(xstart, xend)) # Tile size ysize = yend - ystart xsize = xend - xstart # Read the data for the current tile # Convert to required datatype and transpose sat_view = as_array(satellite_view_dataset[idx], dtype=numpy.float32, transpose=True) sat_azi = as_array(satellite_azimuth_dataset[idx], dtype=numpy.float32, transpose=True) slope = as_array(slope_dataset[idx], dtype=numpy.float32, transpose=True) aspect = as_array(aspect_dataset[idx], dtype=numpy.float32, transpose=True) # Initialise the work arrays exiting = numpy.zeros((ysize, xsize), dtype='float32') azi_exiting = numpy.zeros((ysize, xsize), dtype='float32') # Process the current tile exiting_angle(xsize, ysize, sat_view, sat_azi, slope, aspect, exiting.transpose(), azi_exiting.transpose()) # Write the current to disk exiting_dset[idx] = exiting azi_exit_dset[idx] = azi_exiting if out_group is None: return fid
def combine_shadow_masks(self_shadow_group, cast_shadow_sun_group, cast_shadow_satellite_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ A convienice function for combining the shadow masks into a single boolean array. :param self_shadow_group: The root HDF5 `Group` that contains the self shadow dataset specified by the pathname given by: * DatasetName.SELF_SHADOW :param cast_shadow_sun_group: The root HDF5 `Group` that contains the cast shadow (solar direction) dataset specified by the pathname given by: * DatasetName.CAST_SHADOW_FMT :param cast_shadow_sun_group: The root HDF5 `Group` that contains the cast shadow (satellite direction) dataset specified by the pathname given by: * DatasetName.CAST_SHDADOW_FMT :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset names will be given by the format string detailed by: * DatasetName.COMBINED_SHADOW :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ # access the datasets dname_fmt = DatasetName.CAST_SHADOW_FMT.value self_shad = self_shadow_group[DatasetName.SELF_SHADOW.value] cast_sun = cast_shadow_sun_group[dname_fmt.format(source='SUN')] dname = dname_fmt.format(source='SATELLITE') cast_sat = cast_shadow_satellite_group[dname] geobox = GriddedGeoBox.from_dataset(self_shad) # Initialise the output files if out_group is None: fid = h5py.File('combined-shadow.h5', driver='core', backing_store=False) else: fid = out_group if GroupName.SHADOW_GROUP.value not in fid: fid.create_group(GroupName.SHADOW_GROUP.value) if filter_opts is None: filter_opts = {} else: filter_opts = filter_opts.copy() grp = fid[GroupName.SHADOW_GROUP.value] tile_size = cast_sun.chunks filter_opts['chunks'] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() cols, rows = geobox.get_shape_xy() kwargs['shape'] = (rows, cols) kwargs['dtype'] = 'bool' # output dataset out_dset = grp.create_dataset(DatasetName.COMBINED_SHADOW.value, **kwargs) # attach some attributes to the image datasets attrs = { 'crs_wkt': geobox.crs.ExportToWkt(), 'geotransform': geobox.transform.to_gdal() } desc = ("Combined shadow masks: 1. self shadow, " "2. cast shadow (solar direction), " "3. cast shadow (satellite direction).") attrs['description'] = desc attrs['mask_values'] = "False = Shadow; True = Non Shadow" attrs['alias'] = 'terrain-shadow' attach_image_attributes(out_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start locations ystart, yend = tile[0] xstart, xend = tile[1] idx = (slice(ystart, yend), slice(xstart, xend)) out_dset[idx] = (self_shad[idx] & cast_sun[idx] & cast_sat[idx]) if out_group is None: return fid
def self_shadow(incident_angles_group, exiting_angles_group, out_group=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ Computes the self shadow mask. :param incident_angles_group: The root HDF5 `Group` that contains the incident angle dataset specified by the pathname given by: * DatasetName.INCIDENT :param exiting_angles_group: The root HDF5 `Group` that contains the exiting angle dataset specified by the pathname given by: * DatasetName.EXITING :param out_group: If set to None (default) then the results will be returned as an in-memory hdf5 file, i.e. the `core` driver. Otherwise, a writeable HDF5 `Group` object. The dataset name will be given by: * DatasetName.SELF_SHADOW :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: An opened `h5py.File` object, that is either in-memory using the `core` driver, or on disk. """ incident_angle = incident_angles_group[DatasetName.INCIDENT.value] exiting_angle = exiting_angles_group[DatasetName.EXITING.value] geobox = GriddedGeoBox.from_dataset(incident_angle) # Initialise the output file if out_group is None: fid = h5py.File('self-shadow.h5', driver='core', backing_store=False) else: fid = out_group if filter_opts is None: filter_opts = {} else: filter_opts = filter_opts.copy() if GroupName.SHADOW_GROUP.value not in fid: fid.create_group(GroupName.SHADOW_GROUP.value) grp = fid[GroupName.SHADOW_GROUP.value] tile_size = exiting_angle.chunks filter_opts['chunks'] = tile_size kwargs = compression.config(**filter_opts).dataset_compression_kwargs() cols, rows = geobox.get_shape_xy() kwargs['shape'] = (rows, cols) kwargs['dtype'] = 'bool' # output dataset dataset_name = DatasetName.SELF_SHADOW.value out_dset = grp.create_dataset(dataset_name, **kwargs) # attach some attributes to the image datasets attrs = { 'crs_wkt': geobox.crs.ExportToWkt(), 'geotransform': geobox.transform.to_gdal() } desc = "Self shadow mask derived using the incident and exiting angles." attrs['description'] = desc attrs['alias'] = 'self-shadow' attach_image_attributes(out_dset, attrs) # process by tile for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]): # Row and column start locations ystart, yend = tile[0] xstart, xend = tile[1] idx = (slice(ystart, yend), slice(xstart, xend)) # Read the data for the current tile inc = numpy.radians(incident_angle[idx]) exi = numpy.radians(exiting_angle[idx]) # Process the tile mask = numpy.ones(inc.shape, dtype='uint8') mask[numpy.cos(inc) <= 0.0] = 0 mask[numpy.cos(exi) <= 0.0] = 0 # Write the current tile to disk out_dset[idx] = mask if out_group is None: return fid
def image_residual(ref_fid, test_fid, pathname, out_fid, compression=H5CompressionFilter.LZF, save_inputs=False, filter_opts=None): """ Undertake residual analysis for IMAGE CLASS Datasets. A histogram and a cumulative histogram of the residuals are calculated and recorded as TABLE CLASS Datasets. Any NaN's in IMAGE datasets will be handled automatically. :param ref_fid: A h5py file object (essentially the root Group), containing the reference data. :param test_fid: A h5py file object (essentially the root Group), containing the test data. :param pathname: A `str` containing the pathname to the IMAGE Dataset. :param out_fid: A h5py file object (essentially the root Group), opened for writing the output data. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :param save_inputs: A `bool` indicating whether or not to save the input datasets used for evaluating the residuals alongside the results. Default is False. :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: None; This routine will only return None or a print statement, this is essential for the HDF5 visit routine. """ def evaluate(ref_dset, test_dset): """ Evaluate the image residual. Caters for boolean types. TODO: geobox intersection if dimensions are different. TODO: handle no data values TODO: handle classification datasets TODO: handle bitwise datasets """ if ref_dset.dtype.name == 'bool': result = numpy.logical_xor(ref_dset, test_dset).astype('uint8') else: result = ref_dset[:] - test_dset return result class_name = 'IMAGE' ref_dset = ref_fid[pathname] test_dset = test_fid[pathname] # ignore no data values for the time being residual = evaluate(ref_dset, test_dset) min_residual = numpy.nanmin(residual) max_residual = numpy.nanmax(residual) pct_difference = (residual != 0).sum() / residual.size * 100 if filter_opts is None: fopts = {} else: fopts = filter_opts.copy() fopts['chunks'] = ref_dset.chunks geobox = GriddedGeoBox.from_dataset(ref_dset) # output residual attrs = { 'crs_wkt': geobox.crs.ExportToWkt(), 'geotransform': geobox.transform.to_gdal(), 'description': 'Residual', 'min_residual': min_residual, 'max_residual': max_residual, 'percent_difference': pct_difference } base_dname = pbasename(pathname) group_name = ref_dset.parent.name.strip('/') dname = ppjoin('RESULTS', class_name, 'RESIDUALS', group_name, base_dname) write_h5_image(residual, dname, out_fid, compression, attrs, fopts) # residuals distribution h = distribution(residual) hist = h['histogram'] attrs = { 'description': 'Frequency distribution of the residuals', 'omin': h['omin'], 'omax': h['omax'] } dtype = numpy.dtype([('bin_locations', h['loc'].dtype.name), ('residuals_distribution', hist.dtype.name)]) table = numpy.zeros(hist.shape, dtype=dtype) table['bin_locations'] = h['loc'] table['residuals_distribution'] = hist # output del fopts['chunks'] dname = ppjoin('RESULTS', class_name, 'FREQUENCY-DISTRIBUTIONS', group_name, base_dname) write_h5_table(table, dname, out_fid, compression, attrs=attrs, filter_opts=fopts) # cumulative distribution h = distribution(numpy.abs(residual)) hist = h['histogram'] cdf = numpy.cumsum(hist / hist.sum()) attrs = { 'description': 'Cumulative distribution of the residuals', 'omin': h['omin'], 'omax': h['omax'], '90th_percentile': h['loc'][numpy.searchsorted(cdf, 0.9)], '99th_percentile': h['loc'][numpy.searchsorted(cdf, 0.99)] } dtype = numpy.dtype([('bin_locations', h['loc'].dtype.name), ('cumulative_distribution', cdf.dtype.name)]) table = numpy.zeros(cdf.shape, dtype=dtype) table['bin_locations'] = h['loc'] table['cumulative_distribution'] = cdf # output dname = ppjoin('RESULTS', class_name, 'CUMULATIVE-DISTRIBUTIONS', group_name, base_dname) write_h5_table(table, dname, out_fid, compression=compression, attrs=attrs, filter_opts=fopts) if save_inputs: # copy the reference data out_grp = out_fid.require_group(ppjoin('REFERENCE-DATA', group_name)) ref_fid.copy(ref_dset, out_grp) # copy the test data out_grp = out_fid.require_group(ppjoin('TEST-DATA', group_name)) test_fid.copy(test_dset, out_grp)
def run(self): temp_directory = pjoin(self.workdir, 'work') if not exists(temp_directory): os.makedirs(temp_directory) temp_yaml = pjoin(temp_directory, self.output_yaml.format(granule=self.granule)) try: land = is_land_tile(self.granule, self.ocean_tile_list) if land: location = "{}/{}".format(self.granule, self.land_band) else: location = "{}/{}".format(self.granule, self.ocean_band) h5 = h5py.File(self.input()[0].path, 'r') geobox = GriddedGeoBox.from_dataset(h5[location]) landsat_scenes = intersecting_landsat_scenes( geobox_to_polygon(geobox), self.landsat_scenes_shapefile) timestamp = acquisition_timestamp(h5, self.granule) band_id = h5[location].attrs['band_id'] # TODO landsat sat_id sat_id = 's2' references = reference_imagery( landsat_scenes, timestamp, band_id, sat_id, [self.reference_directory, self.backup_reference]) _LOG.debug("granule %s found reference images %s", self.granule, [ref.filename for ref in references]) vrt_file = pjoin(temp_directory, 'reference.vrt') build_vrt(references, vrt_file, temp_directory) source_band = pjoin(temp_directory, 'source.tif') source_image = h5[location][:] source_image[source_image == -999] = 0 write_img(source_image, source_band, geobox=geobox, nodata=0, options={ 'compression': 'deflate', 'zlevel': 1 }) if land: extra = ['-g', self.gverify_grid_size] cmd = gverify_cmd(self, vrt_file, source_band, temp_directory, extra=extra) _LOG.debug('calling gverify %s', ' '.join(cmd)) run_command(cmd, temp_directory, timeout=self.gverify_timeout) else: # create a set of fix-points from landsat path-row points_txt = pjoin(temp_directory, 'points.txt') collect_gcp(self.gverify_root_fix_qa_location, landsat_scenes, points_txt) extra = ['-t', 'FIXED_LOCATION', '-t_file', points_txt] cmd = gverify_cmd(self, vrt_file, source_band, temp_directory, extra=extra) _LOG.debug('calling gverify %s', ' '.join(cmd)) run_command(cmd, temp_directory, timeout=self.gverify_timeout) _LOG.debug('finished gverify on %s', self.granule) parse_gqa(self, temp_yaml, references, band_id, sat_id, temp_directory) except (ValueError, FileNotFoundError, CommandError) as ve: _LOG.debug('failed because GQA cannot be calculated: %s', str(ve)) _write_failure_yaml( temp_yaml, self.granule, str(ve), gverify_version=self.gverify_binary.split('_')[-1]) with open(pjoin(temp_directory, 'gverify.log'), 'w') as src: src.write('gverify was not executed because:\n') src.write(str(ve)) self.output().makedirs() shutil.copy(temp_yaml, self.output().path) temp_log = glob.glob(pjoin(temp_directory, '*gverify.log'))[0] shutil.copy(temp_log, pjoin(self.workdir, basename(temp_log))) if int(self.cleanup): _cleanup_workspace(temp_directory)