def test_utm_to_lonlat(self): """ Test that coordinates in one CRS are correctly transformed from utm into lonlat This came about with GDAL3 and Proj6, where the native axis mapping of a CRS is respected, meaning that an x,y input and output is dependent on the CRS axis. We'll instead enforce the x,y axis mapping strategy. """ shape = (3, 2) origin = (669700, 6111700) ggb = GriddedGeoBox(shape, origin, crs="EPSG:32755") lon = 148.862561 lat = -35.123064 easting = 669717.105361586 northing = 6111722.038508673 to_crs = osr.SpatialReference() to_crs.ImportFromEPSG(4326) lon, lat = ggb.transform_coordinates((easting, northing), to_crs) self.assertAlmostEqual(lon, 148.862561) self.assertAlmostEqual(lat, -35.123064)
def test_y_size(self): scale = 0.00025 shape = (3, 2) origin = (150.0, -34.0) corner = (shape[1] * scale + origin[0], origin[1] - shape[0] * scale) ggb = GriddedGeoBox(shape, origin) self.assertEqual(shape[0], ggb.y_size())
def test_get_shape_xy(self): scale = 0.00025 shape = (3, 2) shape_xy = (2, 3) origin = (150.0, -34.0) corner = (shape[1] * scale + origin[0], origin[1] - shape[0] * scale) ggb = GriddedGeoBox(shape, origin) self.assertEqual(shape_xy, ggb.get_shape_xy())
def test_corner_create_unit_GGB_using_corners(self): # create small GGB centred on (150.00025,-34.00025) scale = 0.00025 origin = (150.0, -34.0) corner = (150.0 + scale, -34.0 - scale) ggb = GriddedGeoBox.from_corners(origin, corner) self.assertEqual(corner, ggb.corner)
def unpack_dataset(product_group, product_name, band): dataset = product_group[band] # human readable band name band_name = dataset.attrs["alias"] out_file = pjoin(outdir, "{}_{}.tif".format(product_name, band_name)) count_file = pjoin( outdir, "{}_{}_valid_pixel_count.tif".format(product_name, band_name)) nodata = dataset.attrs.get("no_data_value") geobox = GriddedGeoBox.from_dataset(dataset) data, count = sum_and_count(product_group, mask, band_name) # calculate the mean from sum and count mean = data / count mean[count == 0] = nodata mean = mean.astype("int16") write_img(mean, out_file, nodata=nodata, geobox=geobox, options=options) write_img(count, count_file, nodata=0, geobox=geobox, options=options)
def _write_cogtif(dataset, out_fname): """ Easy wrapper for writing a cogtif, that takes care of datasets that are written row by row rather square(ish) blocks. """ if dataset.chunks[1] == dataset.shape[1]: blockxsize = 512 blockysize = 512 data = dataset[:] else: blockysize, blockxsize = dataset.chunks data = dataset options = { 'blockxsize': blockxsize, 'blockysize': blockysize, 'compress': 'deflate', 'zlevel': 4 } nodata = dataset.attrs.get('no_data_value') geobox = GriddedGeoBox.from_dataset(dataset) # path existence if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(data, out_fname, cogtif=True, levels=LEVELS, nodata=nodata, geobox=geobox, resampling=Resampling.nearest, options=options)
def get_land_sea_mask(gridded_geo_box, \ ancillary_path='/g/data/v10/eoancillarydata/Land_Sea_Rasters'): """ Return a land/sea 2D numpy boolean array in which Land = True, Sea = False for the supplied GriddedGeoBox and using the UTM projected data in the supplied ancillary_path. If the specified gridded_geo_box has a non-UTM CRS or a non-native sample frequency, the data will be reprojected/resampled into the the gridded_geo_box. """ # get lat/long of geo_box origin to_crs = osr.SpatialReference() to_crs.SetFromUserInput('EPSG:4326') origin_longlat = gridded_geo_box.transform_coordinates( gridded_geo_box.origin, to_crs) # get Land/Sea data file for this bounding box utmZone = abs(get_utm_zone(origin_longlat)) utmDataPath = '%s/WORLDzone%d.tif' % (ancillary_path, utmZone) # read the land/sea data with rio.open(utmDataPath) as ds: # get the gridded box for the full dataset extent landSeaDataGGB = GriddedGeoBox.from_dataset(ds) # read the subset relating to Flinders Islet window = landSeaDataGGB.window(gridded_geo_box) out = numpy.zeros(gridded_geo_box.shape, dtype=numpy.uint8) ds.read(1, window=window, out=out) return out
def test_create_corner(self): scale = 0.00025 shape = (3, 2) origin = (150.0, -34.0) corner = (shape[1] * scale + origin[0], origin[1] - shape[0] * scale) ggb = GriddedGeoBox(shape, origin) self.assertEqual(corner, ggb.corner)
def calculate_average(dataframe): """ Given a dataframe with the columns: * filename * band_name Calculate the 3D/timeseries average from all input records. Each 2D dataset has dimensions (73y, 144x), and type float32. """ dims = (dataframe.shape[0], 73, 144) data = numpy.zeros(dims, dtype="float32") # load all data into 3D array (dims are small so just read all) for i, rec in enumerate(dataframe.iterrows()): row = rec[1] with h5py.File(row.filename, "r") as fid: ds = fid[row.band_name] ds.read_direct(data[i]) no_data = float(ds.attrs['missing_value']) # check for nodata and convert to nan # do this for each dataset in case the nodata value changes data[i][data[i] == no_data] = numpy.nan # get the geobox, chunks with h5py.File(row.filename, "r") as fid: ds = fid[row.dataset_name] geobox = GriddedGeoBox.from_dataset(ds) chunks = ds.chunks mean = numpy.nanmean(data, axis=0) return mean, geobox, chunks
def contiguity(fname, output): """ Write a contiguity mask file based on the intersection of valid data pixels across all bands from the input file and output to the specified directory """ with rasterio.open(fname) as ds: geobox = GriddedGeoBox.from_dataset(ds) yblock, xblock = ds.block_shapes[0] ones = np.ones((ds.height, ds.width), dtype='uint8') for band in ds.indexes: ones &= ds.read(band) > 0 co_options = { 'compress': 'deflate', 'zlevel': 4, 'blockxsize': xblock, 'blockysize': yblock } write_img(ones, output, cogtif=True, levels=[2, 4, 8, 16, 32], geobox=geobox, options=co_options) return None
def test_real_world_shape(self): # Flinders Islet, NSW flindersOrigin = (150.927659, -34.453309) flindersCorner = (150.931697, -34.457915) shapeShouldBe = (19, 17) ggb = GriddedGeoBox.from_corners(flindersOrigin, flindersCorner) self.assertEqual(shapeShouldBe, ggb.shape)
def test_real_world_origin_lat(self): # Flinders Islet, NSW flindersOrigin = (150.927659, -34.453309) flindersCorner = (150.931697, -34.457915) originShouldBe = flindersOrigin ggb = GriddedGeoBox.from_corners(flindersOrigin, flindersCorner) self.assertAlmostEqual(originShouldBe[1], ggb.origin[1])
def test_shape_create_unit_GGB_using_corners(self): # create small GGB centred on (150.00025,-34.00025) expectedShape = (1, 1) scale = 0.00025 origin = (150.0, -34.0) corner = (150.0 + scale, -34.0 - scale) ggb = GriddedGeoBox.from_corners(origin, corner) self.assertEqual(expectedShape, ggb.shape)
def test_ggb_crs_from_h5_dataset(self): img, geobox = ut.create_test_image() with h5py.File("tmp.h5", "w", driver="core", backing_store=False) as fid: ds = fid.create_dataset("test", data=img) ds.attrs["geotransform"] = geobox.transform.to_gdal() ds.attrs["crs_wkt"] = geobox.crs.ExportToWkt() new_geobox = GriddedGeoBox.from_h5_dataset(ds) self.assertTrue(new_geobox.crs.ExportToWkt() == geobox.crs.ExportToWkt())
def test_ggb_shape_from_h5_dataset(self): img, geobox = ut.create_test_image() with h5py.File('tmp.h5', driver='core', backing_store=False) as fid: ds = fid.create_dataset('test', data=img) ds.attrs['geotransform'] = geobox.transform.to_gdal() ds.attrs['crs_wkt'] = geobox.crs.ExportToWkt() new_geobox = GriddedGeoBox.from_h5_dataset(ds) self.assertTrue(new_geobox.shape == img.shape)
def test_real_world_origin_lon(self): # Flinders Islet, NSW flindersOrigin = (150.927659, -34.453309) flindersCorner = (150.931697, -34.457915) originShouldBe = flindersOrigin shapeShouldBe = (19, 17) ggb = GriddedGeoBox.from_corners(flindersOrigin, flindersCorner) self.assertEqual(shapeShouldBe, ggb.shape) self.assertAlmostEqual(originShouldBe[0], ggb.origin[0])
def test_ggb_shape_from_gdal_dataset(self): img, geobox = ut.create_test_image() drv = gdal.GetDriverByName('MEM') ds = drv.Create('tmp.tif', img.shape[1], img.shape[0], 1, 1) ds.SetGeoTransform(geobox.transform.to_gdal()) ds.SetProjection(geobox.crs.ExportToWkt()) new_geobox = GriddedGeoBox.from_gdal_dataset(ds) self.assertTrue(new_geobox.shape == img.shape) drv = None ds = None
def test_real_world_corner_lat(self): # Flinders Islet, NSW flindersOrigin = (150.927659, -34.453309) flindersCorner = (150.931697, -34.457915) originShouldBe = flindersOrigin shapeShouldBe = (19, 17) cornerShouldBe = (flindersOrigin[0] + shapeShouldBe[1] * 0.00025, \ flindersOrigin[1] - shapeShouldBe[0] * 0.00025) ggb = GriddedGeoBox.from_corners(flindersOrigin, flindersCorner) self.assertAlmostEqual(cornerShouldBe[1], ggb.corner[1])
def write_tif_from_dataset(dataset, out_fname, options, config_options, overviews=True, nodata=None, geobox=None): """ Method to write a h5 dataset or numpy array to a tif file :param dataset: h5 dataset containing a numpy array or numpy array Dataset will map to the raster data :param out_fname: destination of the tif :param options: dictionary of options provided to gdal :param config_options: dictionary of configurations provided to gdal :param overviews: boolean flag to create overviews default (True) returns the out_fname param """ if hasattr(dataset, "chunks"): data = dataset[:] else: data = dataset if nodata is None and hasattr(dataset, "attrs"): nodata = dataset.attrs.get("no_data_value") if geobox is None: geobox = GriddedGeoBox.from_dataset(dataset) # path existence if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img( data, out_fname, levels=LEVELS, nodata=nodata, geobox=geobox, resampling=Resampling.average, options=options, config_options=config_options, ) return out_fname
def contiguity(fname): """ Write a contiguity mask file based on the intersection of valid data pixels across all bands from the input file and returns with the geobox of the source dataset """ with rasterio.open(fname) as ds: geobox = GriddedGeoBox.from_dataset(ds) yblock, xblock = ds.block_shapes[0] ones = np.ones((ds.height, ds.width), dtype="uint8") for band in ds.indexes: ones &= ds.read(band) > 0 return ones, geobox
def _append_info(ds_paths, bnames, no_data, geoboxes, parent, name, obj): """ Append the required info for the target dataset. """ if obj.attrs.get("CLASS") == "IMAGE": no_data.append(obj.attrs.get("no_data_value")) vrt_path = PATH_FMT.format(basename(obj.file.filename), obj.name) ds_paths.append(vrt_path) geoboxes.append(GriddedGeoBox.from_dataset(obj)) if parent: bnames.append(FMT.format(basename(obj.parent.name), name)) else: bnames.append(name)
def convert_image(dataset, output_directory): """ Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff, with deflate zlevel 1 compression. Any attributes stored with the image will be written as dataset level metadata tags, and not band level tags. All attributes will also be written to a yaml file. :param dataset: A HDF5 `IMAGE` Class dataset. :param output_directory: A filesystem path to the directory that will be the root directory for any images extracted. :return: None, outputs are written directly to disk. """ geobox = GriddedGeoBox.from_dataset(dataset) tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE} if 'no_data_value' in tags: no_data = tags.pop('no_data_value') else: no_data = None tags['history'] = "Converted from HDF5 IMAGE to GeoTiff." # TODO: get x & y chunks from 3D images kwargs = { 'driver': 'GTiff', 'geobox': geobox, 'options': { 'zlevel': 1, 'compress': 'deflate' }, 'tags': tags, 'nodata': no_data } base_fname = pjoin(output_directory, normpath(dataset.name.strip('/'))) out_fname = ''.join([base_fname, '.tif']) if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, **kwargs) out_fname = ''.join([base_fname, '.yaml']) tags = {k: v for k, v in dataset.attrs.items()} with open(out_fname, 'w') as src: yaml.dump(tags, src, default_flow_style=False, indent=4)
def convert_image(dataset, output_directory): """ Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff, with deflate zlevel 1 compression. Any attributes stored with the image will be written as dataset level metadata tags, and not band level tags. All attributes will also be written to a yaml file. :param dataset: A HDF5 `IMAGE` Class dataset. :param output_directory: A filesystem path to the directory that will be the root directory for any images extracted. :return: None, outputs are written directly to disk. """ geobox = GriddedGeoBox.from_dataset(dataset) tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE} if "no_data_value" in tags: no_data = tags.pop("no_data_value") else: no_data = None tags["history"] = "Converted from HDF5 IMAGE to GeoTiff." # TODO: get x & y chunks from 3D images kwargs = { "driver": "GTiff", "geobox": geobox, "options": { "zlevel": 1, "compress": "deflate" }, "tags": tags, "nodata": no_data, } base_fname = pjoin(output_directory, normpath(dataset.name.strip("/"))) out_fname = "".join([base_fname, ".tif"]) if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, **kwargs) out_fname = "".join([base_fname, ".yaml"]) tags = {k: v for k, v in dataset.attrs.items()} with open(out_fname, "w") as src: yaml.dump(tags, src, default_flow_style=False, indent=4)
def test_lonlat_to_utm(self): """ Test that coordinates in one CRS are correctly transformed from lonlat into utm This came about with GDAL3 and Proj6, where the native axis mapping of a CRS is respected, meaning that an x,y input and output is dependent on the CRS axis. We'll instead enforce the x,y axis mapping strategy. """ shape = (3, 2) origin = (150.0, -34.0) ggb = GriddedGeoBox(shape, origin) lon = 148.862561 lat = -35.123064 to_crs = osr.SpatialReference() to_crs.ImportFromEPSG(32755) easting, northing = ggb.transform_coordinates((lon, lat), to_crs) self.assertAlmostEqual(easting, 669717.105361586) self.assertAlmostEqual(northing, 6111722.038508673)
def get_img_dataset_info(dataset, path, layer=1): """ Returns metadata for raster datasets """ geobox = GriddedGeoBox.from_dataset(dataset) return { 'path': path, 'layer': layer, 'info': { 'width': geobox.x_size(), 'height': geobox.y_size(), 'geotransform': list(geobox.transform.to_gdal()) } }
def wagl_unpack(scene, granule, h5group, outdir): """ Unpack and package the NBAR and NBART products. """ # listing of all datasets of IMAGE CLASS type img_paths = find(h5group, 'IMAGE') for product in PRODUCTS: for pathname in [p for p in img_paths if '/{}/'.format(product) in p]: dataset = h5group[pathname] if dataset.attrs['band_name'] == 'BAND-9': # TODO re-work so that a valid BAND-9 from another sensor isn't skipped continue acqs = scene.get_acquisitions(group=pathname.split('/')[0], granule=granule) acq = [a for a in acqs if a.band_name == dataset.attrs['band_name']][0] # base_dir = pjoin(splitext(basename(acq.pathname))[0], granule) base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0]) match_dict = PATTERN.match(base_fname).groupdict() fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product, match_dict.get('band_name'), match_dict.get('extension')) out_fname = pjoin(outdir, # base_dir.replace('L1C', 'ARD'), # granule.replace('L1C', 'ARD'), product, fname.replace('L1C', 'ARD')) # output if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, cogtif=True, levels=LEVELS, nodata=dataset.attrs['no_data_value'], geobox=GriddedGeoBox.from_dataset(dataset), resampling=Resampling.nearest, options={'blockxsize': dataset.chunks[1], 'blockysize': dataset.chunks[0], 'compress': 'deflate', 'zlevel': 4}) # retrieve metadata scalar_paths = find(h5group, 'SCALAR') pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0] tags = yaml.load(h5group[pathname][()]) return tags
def get_img_dataset_info(dataset, path, layer=1): """ Returns metadata for raster datasets """ geobox = GriddedGeoBox.from_dataset(dataset) return { "path": path, "layer": layer, "info": { "width": geobox.x_size(), "height": geobox.y_size(), "geotransform": list(geobox.transform.to_gdal()), }, }
def test_ggb_shape_from_rio_dataset(self): img, geobox = ut.create_test_image() kwargs = { "driver": "MEM", "width": img.shape[1], "height": img.shape[0], "count": 1, "transform": geobox.transform, "crs": geobox.crs.ExportToWkt(), "dtype": img.dtype.name, } with rio.open("tmp.tif", "w", **kwargs) as ds: new_geobox = GriddedGeoBox.from_rio_dataset(ds) self.assertTrue(new_geobox.shape == img.shape)
def test_ggb_shape_from_rio_dataset(self): img, geobox = ut.create_test_image() kwargs = { 'driver': 'MEM', 'width': img.shape[1], 'height': img.shape[0], 'count': 1, 'transform': geobox.transform, 'crs': geobox.crs.ExportToWkt(), 'dtype': img.dtype.name } with rio.open('tmp.tif', 'w', **kwargs) as ds: new_geobox = GriddedGeoBox.from_rio_dataset(ds) self.assertTrue(new_geobox.shape == img.shape)
def convert_format(self, dataset_name, group, attrs=None, compression=H5CompressionFilter.LZF, filter_opts=None): """ Convert the HDF file to a HDF5 dataset. """ if attrs is None: attrs = {} # Get the UL corner of the UL pixel co-ordinate ul_lon = self.ul[0] ul_lat = self.ul[1] # pixel size x & y pixsz_x = self.delta_lon pixsz_y = self.delta_lat # Setup the projection; assuming Geographics WGS84 # (Tests have shown that this appears to be the case) # (unfortunately it is not expicitly defined in the HDF file) sr = osr.SpatialReference() sr.SetWellKnownGeogCS("WGS84") prj = sr.ExportToWkt() # Setup the geobox dims = self.data[0].shape res = (abs(pixsz_x), abs(pixsz_y)) geobox = GriddedGeoBox(shape=dims, origin=(ul_lon, ul_lat), pixelsize=res, crs=prj) # Write the dataset attrs['description'] = 'Converted BRDF data from H4 to H5.' attrs['crs_wkt'] = prj attrs['geotransform'] = geobox.transform.to_gdal() write_h5_image(self.data[0], dataset_name, group, compression, attrs, filter_opts)