def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers # extract validcodes parameter. An item in validcodes can be a single number or a range. valid_codes = [] for validcode_item in params["validcodes"]: if type(validcode_item) is list: valid_codes = valid_codes + list(range(validcode_item[0], validcode_item[1] + 1)) else: valid_codes.append(validcode_item) for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # get dictionary of pixel 'codes-counts' ds_band = ds.GetRasterBand(1) counts = ds_band.GetHistogram(approx_ok=False) codes = range(len(counts)) hist = dict(zip(codes, counts)) # get list of 'used' codes (with non-zero pixel count) used_codes = [i for i in hist if hist[i] != 0] # check particular codes against given list of valid codes invalid_codes = list() for code in used_codes: if code not in valid_codes: invalid_codes.append(str(code)) if len(invalid_codes) > 0: invalid_codes_str = ', '.join(invalid_codes) status.failed("Layer {:s} has pixels with invalid values: {:s}." .format(layer_def["src_layer_name"], invalid_codes_str))
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers # enable gdal to use exceptions gdal.UseExceptions() # set compression type names to lowercase allowed_compression_types = [c.lower() for c in params["compression"]] for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # get raster metadata meta = ds.GetMetadata("IMAGE_STRUCTURE") compression = meta.get("COMPRESSION", None) if compression is None: status.failed("Layer {:s} does not have raster data compression set.".format(layer_def["src_layer_name"])) continue if compression.lower() not in allowed_compression_types: status.failed("The raster compression type '{:s}' of layer {:s} is not allowed." .format(compression, layer_def["src_layer_name"]))
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers grid_size = params.get("grid_size", 1000) for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # upper-left coordinate divided by pixel-size must leave no remainder gt = ds.GetGeoTransform() ulx = gt[0] uly = gt[3] pixelsizex = gt[1] pixelsizey = gt[5] if "grid_size" in params and params["grid_size"] >= pixelsizex and params["grid_size"] >= pixelsizey: if ulx % pixelsizex != 0 or uly % pixelsizey != 0: status.failed("The upper-left X, Y coordinates of layer {:s} are not divisible by pixel-size with no remainder." .format(layer_def["src_layer_name"])) # Layers must fit to the specified grid if ulx % grid_size != 0 or uly % grid_size != 0: status.failed("The raster origin of layer {:s} does not align to the LEAC {} metre grid." .format(layer_def["src_layer_name"], grid_size))
def run_check(params, status): import osgeo.ogr as ogr from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): # check for .vat.dbf file existence dbf_filename = "{:s}.vat.dbf".format(layer_def["src_filepath"].name) dbf_filepath = layer_def["src_filepath"].with_name(dbf_filename) if not dbf_filepath.is_file(): status.failed( "Attribute table file (.vat.dbf) for layer {:s} is missing.". format(layer_def["src_filepath"].name)) continue ds = ogr.Open(str(dbf_filepath)) layer = ds.GetLayer() attr_names = [field_defn.name for field_defn in layer.schema] missing_attr_regexes = [] for attr_regex in params["attribute_regexes"]: is_missing = True for attr_name in attr_names: mobj = re.match("{:s}$".format(attr_regex), attr_name, re.IGNORECASE) if mobj is not None: is_missing = False break if is_missing: missing_attr_regexes.append(attr_regex) if len(missing_attr_regexes) > 0: missing_attr_message = ", ".join(missing_attr_regexes) status.failed( "Raster attribute table {:s} has missing attributes: {:s}.". format(dbf_filepath.name, missing_attr_message))
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers expected_datatype = params["datatype"] for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # Get the DataType of the band ("Byte" means 8-bit depth). band = ds.GetRasterBand(1) actual_datatype = gdal.GetDataTypeName(band.DataType) # Compare actual data type to expected data type. if str(actual_datatype).lower() != str(expected_datatype).lower(): status.failed("Layer {:s}: The raster data type '{:s}' does not match the expected data type '{:s}'." .format(layer_def["src_layer_name"], actual_datatype, expected_datatype))
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # get dictionary of pixel 'codes-counts' ds_band = ds.GetRasterBand(1) blocksize = ds_band.GetBlockSize() if blocksize[0] > params["max_blocksize"] or blocksize[1] > params[ "max_blocksize"]: status.aborted( "Layer {:s} has block size [{:d}, {:d}]. " "Maximum allowed block height or width is {:d}.".format( layer_def["src_layer_name"], blocksize[0], blocksize[1], params["max_blocksize"]))
def run_check(params, status): import osgeo.gdal as gdal import osgeo.osr as osr from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) srs = osr.SpatialReference(ds.GetProjection()) if srs is None or srs.IsProjected() == 0: status.failed("The raster {:s} has SRS missing.".format(layer_def["src_layer_name"])) continue # Search EPSG authority code srs.AutoIdentifyEPSG() authority_name = srs.GetAuthorityName(None) authority_code = srs.GetAuthorityCode(None) if authority_name == "EPSG" and authority_code is not None: try: authority_code = int(authority_code) except ValueError: status.aborted("The raster {:s} has non integer epsg code {:s}".format(layer_def["src_layer_name"], authority_code)) else: if authority_code != params["epsg"]: status.aborted("The raster {:s} has illegal EPSG code {:d}." .format(layer_def["src_layer_name"], authority_code)) elif params.get("auto_identify_epsg", False): # Parameter auto_identify_epsg can be used for less-strict checking of .prj files. # There is a built-in function in GDAL 2.3 with matching logic. is_detected = False expected_srs = osr.SpatialReference() expected_srs.ImportFromEPSG(params["epsg"]) if srs.IsSame(expected_srs): # The auto-detected epsg is made available for other checks. status.add_params({"detected_epsg": params["epsg"]}) else: status.aborted("The raster {:s} does not have an epsg code and the epsg code can not be detected, srs: {:s}." .format(layer_def["src_layer_name"], srs.ExportToWkt())) else: status.aborted("The raster {:s} has epsg code missing, srs: {:s}." .format(layer_def["src_layer_name"], srs.ExportToWkt()))
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # get dictionary of pixel 'codes-counts' ds_band = ds.GetRasterBand(1) band_nodata = ds_band.GetNoDataValue() if band_nodata is None: status.aborted( "Layer {:s} does not have a NoData value set.".format( layer_def["src_layer_name"])) elif band_nodata != params["nodata_value"]: status.aborted( "Layer {:s} has invalid NoData value: {}. The expected NoData value is {}." .format(layer_def["src_layer_name"], band_nodata, params["nodata_value"]))
def run_check(params, status): from qc_tool.vector.helper import do_inspire_check from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): # Locate a 'metadata' subdirectory inside the delivery. # Metadata directory name is case-insensitive, 'Metadata' and 'metadata' are both allowed. metadata_dirs = [ d for d in params["unzip_dir"].glob('**/*') if d.is_dir() and str(d).lower().endswith(METADATA_DIRNAME) ] if len(metadata_dirs) == 0: status.info( "The delivery does not contain the expected '{:s}' folder". format(METADATA_DIRNAME)) return elif len(metadata_dirs) > 1: status.info( "Multiple folders named '{:s}' were found in the delivery.", "Only one '{:s}' folder is allowed.".format(METADATA_DIRNAME)) return else: metadata_dir = metadata_dirs[0] # If the Metadata directory exists, try to locate the .xml file inside it. xml_filepath = locate_xml_file(metadata_dir, layer_def["src_filepath"]) if xml_filepath is None: status.info( "The delivery does not contain the expected metadata file '{:s}/{:s}.xml'" .format(metadata_dir.stem, layer_def["src_filepath"].stem)) return # Validate the xml file using INSPIRE validator service export_prefix = "s{:02d}_{:s}_inspire".format( params["step_nr"], layer_def["src_filepath"].stem) do_inspire_check(xml_filepath, export_prefix, params["output_dir"], status)
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): ds = gdal.Open(str(layer_def["src_filepath"])) # get raster pixel size gt = ds.GetGeoTransform() x_size = abs(gt[1]) y_size = abs(gt[5]) # verify the square shape of the pixel if x_size != y_size: status.failed("The pixel is not square-shaped.") return if x_size != params["pixelsize"]: status.failed( "Layer {:s} has raster pixel size {:f} m, {:f} m is allowed.". format(layer_def["src_layer_name"], x_size, params["pixelsize"]))
def run_check(params, status): import subprocess import numpy import osgeo.gdal as gdal import osgeo.osr as osr from qc_tool.raster.helper import do_raster_layers from qc_tool.raster.helper import find_tiles from qc_tool.raster.helper import rasterize_mask from qc_tool.raster.helper import read_tile from qc_tool.raster.helper import write_progress from qc_tool.raster.helper import write_percent aoi_code = params["aoi_code"].lower() gap_value_ds = params["outside_area_code"] du_column_name = params.get("du_column_name", None) mask_align_grid = params.get("mask_align_grid", MASK_ALIGN_GRID) # Find the external boundary raster mask layer. raster_boundary_dir = params["boundary_dir"].joinpath("raster") vector_boundary_dir = params["boundary_dir"].joinpath("vector") mask_ident = "default" if "mask" in params: mask_ident = params["mask"] mask_ident = mask_ident.lower() for layer_def in do_raster_layers(params): # set this to true for writing partial progress to a text file. report_progress = True progress_filename = "{:s}_{:s}_progress.txt".format( __name__.split(".")[-1], layer_def["src_filepath"].stem) progress_filepath = params["output_dir"].joinpath(progress_filename) percent_filename = "{:s}_{:s}_percent.txt".format( __name__.split(".")[-1], layer_def["src_filepath"].stem) percent_filepath = params["output_dir"].joinpath(percent_filename) # get raster corners and resolution ds = gdal.Open(str(layer_def["src_filepath"])) ds_gt = ds.GetGeoTransform() ds_ulx = ds_gt[0] ds_xres = ds_gt[1] ds_uly = ds_gt[3] ds_yres = ds_gt[5] ds_lrx = ds_ulx + (ds.RasterXSize * ds_xres) ds_lry = ds_uly + (ds.RasterYSize * ds_yres) # Check availability of mask. # The mask is either retrieved from boundary package or it is generated using mask_ident, du_column_name and aoi_code. if mask_ident.endswith(".gpkg") or mask_ident.endswith(".shp"): mask_vector_filepath = vector_boundary_dir.joinpath(mask_ident) if not mask_vector_filepath.exists(): status.info( "Check cancelled due to boundary vector file {:s} not available." .format(mask_ident)) return mask_file = rasterize_mask(mask_vector_filepath, int(ds_xres), params["du_column_name"], aoi_code, mask_align_grid, ds_ulx, ds_uly, params["output_dir"]) else: mask_file = raster_boundary_dir.joinpath( "mask_{:s}_{:03d}m_{:s}.tif".format(mask_ident, int(ds_xres), aoi_code)) if not mask_file.exists(): status.info( "Check cancelled due to boundary mask file {:s} not available." .format(mask_file.name)) return mask_ds = gdal.Open(str(mask_file)) if mask_ds is None: status.info( "Check cancelled due to boundary mask file {:s} not available." .format(mask_file.name)) return mask_band = mask_ds.GetRasterBand(1) nodata_value_mask = mask_band.GetNoDataValue() # get aoi mask corners and resolution mask_gt = mask_ds.GetGeoTransform() mask_ulx = mask_gt[0] mask_xres = mask_gt[1] mask_uly = mask_gt[3] mask_yres = mask_gt[5] mask_lrx = ds_ulx + (mask_ds.RasterXSize * mask_xres) mask_lry = ds_uly + (mask_ds.RasterYSize * mask_yres) # Check if the dataset extent intersects the mask extent. if (mask_ulx > ds_lrx or mask_uly < ds_lry or mask_lrx < ds_ulx or mask_lry > ds_uly): if mask_ident.endswith(".gpkg") or mask_ident.endswith(".shp"): extent_message = "Layer {:s} does not intersect any AOI polygon with {:s}={:s} from boundary {:s}." extent_message = extent_message.format( layer_def["src_filepath"].name, du_column_name, aoi_code, mask_ident) else: extent_message = "Layer {:s} does not intersect the AOI mask {:s}." extent_message = extent_message.format( layer_def["src_layer_name"], mask_ident) extent_message += "Raster extent: [{:f} {:f}, {:f} {:f}]".format( ds_ulx, ds_uly, ds_lrx, ds_lry) extent_message += "AOI extent: [{:f} {:f}, {:f} {:f}]".format( mask_ulx, mask_uly, mask_lrx, mask_lry) status.info(extent_message) continue # Check if the raster and the AOI mask have the same resolution. if ds_xres != mask_xres or ds_yres != mask_yres: status.info( "Resolution of the raster [{:f}, {:f}] does not match " "the resolution [{:f}, {:f}] of the boundary mask {:s}.tif.". format(ds_xres, ds_yres, mask_xres, mask_yres, mask_ident)) continue # Check if coordinates of the raster origin are whole integers. if not ds_ulx.is_integer() or not ds_uly.is_integer(): status.info( "Coordinates of the raster origin ({:f}, {:f}) are not whole integers." .format(ds_ulx, ds_uly)) continue # Check if origin of mask is aligned with origin of raster. if abs(ds_ulx - mask_ulx) % ds_xres > 0: status.info( "X coordinates of the raster are not exactly aligned with x coordinates of boundary mask." "Raster origin: {:f}, Mask origin: {:f}".format( ds_ulx, mask_ulx)) continue if abs(ds_uly - mask_uly) % ds_yres > 0: status.info( "Y coordinates of the raster are not exactly aligned with Y coordinates of boundary mask." "Raster origin: {:f}, Mask origin: {:f}".format( ds_uly, mask_uly)) continue if report_progress: msg = "ds_ulx: {:f} mask_ulx: {:f}".format(ds_ulx, mask_ulx) msg += "\nds_uly: {:f} mask_uly: {:f}".format(ds_uly, mask_uly) msg += "\nRasterXSize: {:d} RasterYSize: {:d}".format( ds.RasterXSize, ds.RasterYSize) write_progress(progress_filepath, msg) # Find the tiles tiles = find_tiles(ds, mask_ds) if report_progress: write_progress(progress_filepath, "Number of tiles: {:d}".format(len(tiles))) # processing all the tiles: ds_band = ds.GetRasterBand(1) # retrieval of NoData value: if gap_value_ds == "NODATA": gap_value_ds = ds_band.GetNoDataValue() gap_count_total = 0 num_tiles = len(tiles) gap_filepaths = [] for tile_no, tile in enumerate(tiles): write_progress( progress_filepath, "Processing tile {}/{} ({}).".format(tile_no + 1, len(tiles), tile.position)) # reading the mask data into Numpy array mask_xoff = tile.x_offset mask_yoff = tile.y_offset blocksize_x = tile.ncols blocksize_y = tile.nrows arr_mask = mask_band.ReadAsArray(mask_xoff, mask_yoff, blocksize_x, blocksize_y) # If mask has all values unmapped then mask / raster comparison can be skipped. if numpy.max(arr_mask) == 0 or numpy.min( arr_mask) == nodata_value_mask: write_progress( progress_filepath, "Tile {} has all values outside of mask, skipping.".format( tile_no + 1)) continue if tile.position == "outside": # Current tile is completely outside the bounds of the checked raster. write_progress(progress_filepath, "tile_position: outside.") arr_gaps = (arr_mask == 1) elif tile.position == "inside": # Current tile is completely inside the bounds of the checked raster. arr_ds = read_tile(ds, tile, gap_value_ds) arr_gaps = ((arr_mask == 1) * (arr_ds == gap_value_ds)) else: # Current tile is partially inside and partially outside the bounds of the checked raster. arr_ds = read_tile(ds, tile, gap_value_ds) arr_gaps = ((arr_mask == 1) * (arr_ds == gap_value_ds)) # find unmapped pixels inside mask gap_count = int(numpy.sum(arr_gaps)) if gap_count > 0: # For each mask tile with gaps, create a new warning raster dataset. # These datasets can be merged or polygonized at the end of the run. src_stem = layer_def["src_filepath"].stem gap_ds_filename = "s{:02d}_{:s}_gap_warning_{:d}.tif".format( params["step_nr"], src_stem, tile_no) gap_ds_filepath = params["tmp_dir"].joinpath(gap_ds_filename) driver = gdal.GetDriverByName('GTiff') gap_ds = driver.Create(str(gap_ds_filepath), blocksize_x, blocksize_y, 1, gdal.GDT_Byte, ['COMPRESS=LZW']) gap_ds.SetGeoTransform( [tile.xmin, mask_xres, 0, tile.ymax, 0, mask_yres]) gap_sr = osr.SpatialReference() gap_sr.ImportFromWkt(ds.GetProjectionRef()) gap_ds.SetProjection(gap_sr.ExportToWkt()) gap_band = gap_ds.GetRasterBand(1) gap_band.SetNoDataValue(0) gap_band.WriteArray(arr_gaps.astype("byte"), 0, 0) gap_ds.FlushCache() gap_ds = None gap_filepaths.append(str(gap_ds_filepath)) gap_count_total += gap_count if report_progress: msg = "tile: {:d}/{:d} ({:s}), gaps: {:d}".format( tile_no + 1, num_tiles, tile.position, gap_count) write_progress(progress_filepath, msg) progress_percent = int(100 * (tile_no / num_tiles)) write_percent(percent_filepath, progress_percent) # Free memory for checked raster and for mask. ds = None ds_mask = None # Generate attachments. if gap_count_total > 0: # Merge previously generated tile gap rasters into a .vrt src_stem = layer_def["src_filepath"].stem warning_vrt_filename = "s{:02d}_{:s}_gap_warning.vrt".format( params["step_nr"], src_stem) warning_vrt_filepath = params["tmp_dir"].joinpath( warning_vrt_filename) if len(gap_filepaths) > 0: cmd = ["gdalbuildvrt", str(warning_vrt_filepath)] cmd = cmd + gap_filepaths write_progress(progress_filepath, " ".join(cmd)) subprocess.run(cmd) status.info( "Layer {:s} has {:d} gap pixels in the mapped area.". format(layer_def["src_layer_name"], gap_count_total)) # Convert the .vrt to a GeoTiff if warning_vrt_filepath.is_file(): warning_tif_filename = "s{:02d}_{:s}_gap_warning.tif".format( params["step_nr"], src_stem) warning_tif_filepath = params["output_dir"].joinpath( warning_tif_filename) cmd = [ "gdal_translate", "-of", "GTiff", "-ot", "Byte", "-co", "TILED=YES", "-co", "COMPRESS=LZW", str(warning_vrt_filepath), str(warning_tif_filepath) ] subprocess.run(cmd) status.add_attachment(warning_tif_filepath.name)
def run_check(params, status): import osgeo.gdal as gdal from qc_tool.raster.helper import do_raster_layers for layer_def in do_raster_layers(params): geotiff_name = layer_def["src_filepath"].name ds = gdal.Open(str(layer_def["src_filepath"])) band = ds.GetRasterBand(1) bit_depth = str(gdal.GetDataTypeName(band.DataType)).lower() # Colour tables can only be checked for specific bit depths. if str(bit_depth) not in BIT_DEPTHS_WITH_COLORTABLE: status.info( "The raster {:s} is in {:s} bit depth and thus it is not possible to check for colour table" .format(layer_def["src_layer_name"], bit_depth)) return # check the color table of the band ct = band.GetRasterColorTable() if ct is None: status.failed( "The raster {:s} has embedded color table missing.".format( layer_def["src_layer_name"])) return # read-in the actual color table into a dictionary color_table_count = ct.GetCount() actual_colors = {} for i in range(0, color_table_count): entry = ct.GetColorEntry(i) if not entry: continue # converting a GDAL ColorEntry (r,g,b,a) tuple to a [r,g,b] list actual_colors[str(i)] = list(entry[0:3]) # compare expected color table with the actual color table missing_codes = [] incorrect_colors = [] expected_colors = params["colors"] for code, color in expected_colors.items(): if code not in actual_colors: missing_codes.append(code) elif expected_colors[code] != actual_colors[code]: incorrect_colors.append({ "class": code, "expected": expected_colors[code], "actual": actual_colors[code] }) # report raster values with missing entries in the color table if len(missing_codes) > 0: status.failed( "The raster color table embedded in {:s} does not have entries for raster values {:s}." .format(geotiff_name, ", ".join(missing_codes))) continue # report color mismatches between expected and actual color table if len(incorrect_colors) > 0: color_reports = [] for c in incorrect_colors: color_reports.append( "value:{0}, expected RGB:{1}, actual RGB:{2}".format( c["class"], c["expected"], c["actual"])) status.failed( "The raster color table has some incorrect colors. {:s}". format("; ".join(color_reports))) continue # Check existence of a .tif.clr or .clr file. clr_name1 = str(layer_def["src_filepath"]).replace(".tif", ".clr") clr_filepath1 = Path(clr_name1) clr_filename1 = clr_filepath1.name clr_name2 = str(layer_def["src_filepath"]).replace(".tif", ".tif.clr") clr_filepath2 = Path(clr_name2) clr_filename2 = clr_filepath2.name if clr_filepath1.is_file(): clr_filepath = clr_filepath1 clr_filename = clr_filename1 elif clr_filepath2.is_file(): clr_filepath = clr_filepath2 clr_filename = clr_filename2 else: status.failed( "The expected color table text file {:s} or {:s} is missing.". format(clr_filename1, clr_filename2)) continue # read-in the actual tif.clr color table into a dictionary try: actual_colors = parse_clr_file(clr_filepath) except ValueError: status.failed( "The color table text file {:s} is in incorrect format.". format(clr_filename)) continue # Check colors in .tif.clr file missing_codes = [] incorrect_colors = [] expected_colors = params["colors"] for code, color in expected_colors.items(): if code not in actual_colors: missing_codes.append(code) elif expected_colors[code] != actual_colors[code]: incorrect_colors.append({ "class": code, "expected": expected_colors[code], "actual": actual_colors[code] }) # report raster values with missing entries in the color table if len(missing_codes) > 0: status.failed( "The raster color table text file {:s} does not have entries for raster values {:s}." .format(clr_filename, ", ".join(missing_codes))) # report color mismatches between expected and actual color table if len(incorrect_colors) > 0: color_reports = [] for c in incorrect_colors: color_reports.append( "value:{0}, expected RGB:{1}, actual RGB:{2}".format( c["class"], c["expected"], c["actual"])) status.failed( "The raster color text file {:s} has some incorrect colors. {:s}" .format(clr_filename, "; ".join(color_reports)))
def run_check(params, status): import osgeo.gdal as gdal import skimage.measure as measure from qc_tool.raster.helper import do_raster_layers # set this to true for reporting partial progress to a _progress.txt file. report_progress = True # Determine if a "reclassify" step is required based on the "groupcodes" parameter setting. if "groupcodes" in params and params["groupcodes"] is not None and len( params["groupcodes"]) > 0: use_reclassify = True else: use_reclassify = False # NoData value can optionally be set as parameter # cell values with NODATA are excluded from MMU analysis. if "nodata_value" in params: NODATA = params["nodata_value"] else: NODATA = -1 # FIXME use a value that is outside of the range of possible raster values. # The optional report_exceptions parameter indicates whether any exceptions should be reported. report_exceptions = params.get("report_exceptions", True) # size of a raster tile. Should be a multiple of 256 because GeoTiff stores its data in 256*256 pixel blocks. BLOCKSIZE = 2048 MMU = params["area_pixels"] # Some classes can optionally be excluded from MMU requirements. # Pixels belonging to these classes are reported as exceptions. if "value_exception_codes" in params: exclude_values = params["value_exception_codes"] else: exclude_values = [] # neighbouring values to exclude. # patches with area < MMU which touch a patch having class in neighbour_exclude_values are reported # as exceptions. if "neighbour_exception_codes" in params: neighbour_exclude_values = params["neighbour_exception_codes"] else: neighbour_exclude_values = [] for layer_def in do_raster_layers(params): progress_filename = "{:s}_{:s}_progress.txt".format( __name__.split(".")[-1], layer_def["src_filepath"].stem) progress_filepath = params["output_dir"].joinpath(progress_filename) percent_filename = "{:s}_{:s}_percent.txt".format( __name__.split(".")[-1], layer_def["src_filepath"].stem) percent_filepath = params["output_dir"].joinpath(percent_filename) # The checked raster is not read into memory as a whole. Instead it is read in tiles. # Instead, ReadAsArray is used to read subsets of the raster (tiles) on demand. ds = gdal.Open(str(layer_def["src_filepath"])) nRasterCols = ds.RasterXSize nRasterRows = ds.RasterYSize # tile buffer width. Must be bigger then number of pixels in MMU. buffer_width = MMU + 1 # special handling of very small rasters - no need to split in multiple tiles if buffer_width >= nRasterCols: buffer_width = 0 if nRasterCols <= BLOCKSIZE or nRasterRows <= BLOCKSIZE: buffer_width = 0 blocksize_with_buffer = BLOCKSIZE + 2 * buffer_width # Detected patches with area<MMU will be stored in this list. regions_lessMMU = [] # Exception patches with area<MMU and belonging to exclude class. regions_lessMMU_except = [] nTileCols = int(math.ceil(nRasterCols / BLOCKSIZE)) nTileRows = int(math.ceil(nRasterRows / BLOCKSIZE)) last_col = nTileCols - 1 last_row = nTileRows - 1 if report_progress: msg = "processing {:d} tiles: {:d} rows, {:d} columns".format( nTileRows * nTileCols, nTileRows, nTileCols) write_progress(progress_filepath, msg) # TILES: ITERATE ROWS for tileRow in range(nTileRows): if report_progress: progress_percent = int(100 * ((tileRow + 1) / nTileRows)) write_percent(percent_filepath, progress_percent) if tileRow == 0: # First row yOff = 0 yOffInner = 0 yOffRelative = 0 block_height = BLOCKSIZE + buffer_width block_height_inner = BLOCKSIZE else: # Middle row yOff = tileRow * BLOCKSIZE yOffInner = yOff + buffer_width yOffRelative = buffer_width block_height = blocksize_with_buffer block_height_inner = BLOCKSIZE if tileRow == last_row: # Last row is a special case - block width must be adjusted. block_height = nRasterRows - yOff block_height_inner = block_height - buffer_width # if row tile only contains buffer zone and no inner zone, skip.. if block_height <= buffer_width: continue # if we reached the maximum number of patches < MMU, then report message and exit. if len(regions_lessMMU) > MAX_REPORTED_REGION_COUNT: break # TILES: ITERATE COLUMNS for tileCol in range(nTileCols): if tileCol == 0: # First column xOff = 0 xOffInner = 0 xOffRelative = 0 block_width = BLOCKSIZE + buffer_width block_width_inner = BLOCKSIZE else: # Middle column xOff = tileCol * BLOCKSIZE xOffInner = xOff + buffer_width xOffRelative = buffer_width block_width = blocksize_with_buffer block_width_inner = BLOCKSIZE if tileCol == last_col: # Last column is a special case - block width must be adjusted. block_width = nRasterCols - xOff block_width_inner = block_width - buffer_width # if column tile only contains buffer zone and no inner zone, skip.. if block_width <= buffer_width: continue # if we reached the maximum number of patches < MMU, then report message and exit. if len(regions_lessMMU) > MAX_REPORTED_REGION_COUNT: break # read whole array (with buffers) tile_buffered = ds.ReadAsArray(xOff, yOff, block_width, block_height) # special case: if the tile has all values equal then skip MMU checks. if tile_buffered.min() == tile_buffered.max(): if report_progress: msg_tile = "tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h} all values same." msg_tile = msg_tile.format(tr=tileRow, ntr=nTileRows, tc=tileCol, w=block_width_inner, h=block_height_inner) write_progress(progress_filepath, msg_tile) continue # reclassify inner tile array if some patches should be grouped together if use_reclassify: # tile_inner = reclassify_values(tile_inner, params["groupcodes"]) tile_buffered = reclassify_values(tile_buffered, params["groupcodes"]) # inner tile is subset of buffered tile tile_inner = tile_buffered[yOffRelative:yOffRelative + block_height_inner, xOffRelative:xOffRelative + block_width_inner] # read inner array (without buffer) # label the inner array and find patches < MMU labels_inner = measure.label(tile_inner, background=NODATA, connectivity=1) regions_inner = measure.regionprops(labels_inner) regions_inner_lessMMU = [ r for r in regions_inner if r.area < MMU ] # find lessMMU patches inside inner array not touching edge regions_lessMMU_edge = [ r for r in regions_inner_lessMMU if r.bbox[0] == 0 or r.bbox[1] == 0 or r.bbox[2] == block_width_inner or r.bbox[3] == block_height_inner ] labels_lessMMU_edge = [r.label for r in regions_lessMMU_edge] regions_lessMMU_inside = [ r for r in regions_inner_lessMMU if r.label not in labels_lessMMU_edge ] # progress reporting.. if report_progress: msg = "tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h}" msg = msg.format(tr=tileRow, ntr=nTileRows, tc=tileCol, w=block_width_inner, h=block_height_inner) if len(regions_lessMMU_inside) > 0: msg += " found {:d} areas < MMU".format( len(regions_lessMMU_inside), tileRow, tileCol) write_progress(progress_filepath, msg) # inspect inner patches for r in regions_lessMMU_inside: first_coord_x = r.coords[0][0] first_coord_y = r.coords[0][1] lessMMU_value = tile_inner[first_coord_x, first_coord_y] # convert relative coords to absolute. coords are stored as [row, column]. absolute_coords = [[c[1] + xOffInner, c[0] + yOffInner] for c in r.coords] lessMMU_info = { "tileRow": tileRow, "tileCol": tileCol, "area": r.area, "value": lessMMU_value, "coords": absolute_coords } if lessMMU_value in exclude_values: if report_exceptions: regions_lessMMU_except.append(lessMMU_info) elif patch_touches_cell_with_value( r.coords, tile_inner, neighbour_exclude_values): if report_exceptions: regions_lessMMU_except.append(lessMMU_info) elif patch_touches_raster_edge(absolute_coords, nRasterRows, nRasterCols): if report_exceptions: regions_lessMMU_except.append(lessMMU_info) else: regions_lessMMU.append(lessMMU_info) # no need to read-in buffered tile if there are no suspect lessMMU patches at edge of inner tile if len(regions_lessMMU_edge) == 0: continue elif report_progress: msg = "tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h} INSPECTING EDGE PATCHES" msg = msg.format(tr=tileRow, ntr=nTileRows, tc=tileCol, w=block_width, h=block_height) write_progress(progress_filepath, msg) # processing the outer array expanded by buffer with width=number of pixels in MMU # optimization: set pixels not within buffer zone (deep inside outer array) to background. inner_buf_startcol = xOffRelative + MMU inner_buf_startrow = yOffRelative + MMU inner_buf_endcol = xOffRelative + block_width_inner - MMU inner_buf_endrow = yOffRelative + block_height_inner - MMU if inner_buf_endcol > inner_buf_startcol and inner_buf_endrow > inner_buf_startrow: tile_buffered[inner_buf_startrow:inner_buf_endrow, inner_buf_startcol:inner_buf_endcol] = NODATA labels_buf = measure.label(tile_buffered, background=NODATA, connectivity=1) buf_regions = measure.regionprops(labels_buf) buf_regions_small = [r for r in buf_regions if r.area < MMU] buf_labels_small = [r.label for r in buf_regions_small] # edge_regions_small is used for reporting only. edge_regions_small = [] for r in regions_lessMMU_edge: first_coord_x = r.coords[0][0] first_coord_y = r.coords[0][1] val = tile_inner[first_coord_x, first_coord_y] # get corresponding value of tile edge patch in buffered array.. # if the inner tile edge patch has area < MMU also in the expanded tile, report it. coord_x_buf = first_coord_x + xOffRelative coord_y_buf = first_coord_y + yOffRelative lbl_buf = labels_buf[coord_x_buf, coord_y_buf] if lbl_buf in buf_labels_small: r_buf = buf_regions[lbl_buf - 1] edge_regions_small.append(r) # coordinates are specified as row, column.. # convert [row in tile, column in tile] to [source raster column, source raster row] absolute_coords = [[c[1] + xOff, c[0] + yOff] for c in r_buf.coords] lessMMU_info = { "tileRow": tileRow, "tileCol": tileCol, "area": r_buf.area, "value": val, "coords": absolute_coords } # handling special cases (exception patches) if val in exclude_values: regions_lessMMU_except.append(lessMMU_info) elif patch_touches_cell_with_value( r_buf.coords, tile_buffered, neighbour_exclude_values): regions_lessMMU_except.append(lessMMU_info) elif patch_touches_raster_edge(absolute_coords, nRasterRows, nRasterCols): regions_lessMMU_except.append(lessMMU_info) else: regions_lessMMU.append(lessMMU_info) if report_progress and len(edge_regions_small) > 0: # report actual edge regions < MMU after applying buffer msg = "xOff {:d} yOff {:d} xOffInner {:d} yOffInner {:d}".format( xOff, yOff, xOffInner, yOffInner) write_progress(progress_filepath, msg) msg = "BUFFER: tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h}" msg = msg.format(tr=tileRow, ntr=nTileRows, tc=tileCol, w=block_width, h=block_height) num_edge_patches = len(edge_regions_small) msg += " found {:d} edge areas < MMU in tile {:d}, {:d}".format( num_edge_patches, tileRow, tileCol) write_progress(progress_filepath, msg) # Export errors and exceptions to geopackage. # The geopackage contains one sample point from each lessMMU patch. ## lessMMU patches belonging to one of exclude_values classes are reported as exceptions. if report_exceptions and len(regions_lessMMU_except) > 0: gpkg_filename = "s{:02d}_{:s}_lessmmu_exception.gpkg".format( params["step_nr"], layer_def["src_filepath"].stem) gpkg_filepath = params["output_dir"].joinpath(gpkg_filename) export(regions_lessMMU_except, ds, gpkg_filepath, MAX_REPORTED_REGION_COUNT) status.add_attachment(gpkg_filename) if len(regions_lessMMU_except) <= MAX_REPORTED_REGION_COUNT: status.info( "The data source has {:d} exceptional objects under MMU limit of {:d} pixels." .format(len(regions_lessMMU_except), params["area_pixels"])) else: status.info( "The data source has more than {:d} exceptional objects under MMU limit of {:d} pixels." .format(MAX_REPORTED_REGION_COUNT, params["area_pixels"])) ## lessMMU patches not belonging to exclude_values are reported as errors. if len(regions_lessMMU) > 0: gpkg_filename = "s{:02d}_{:s}_lessmmu_error.gpkg".format( params["step_nr"], layer_def["src_filepath"].stem) gpkg_filepath = params["output_dir"].joinpath(gpkg_filename) export(regions_lessMMU, ds, gpkg_filepath, MAX_REPORTED_REGION_COUNT) status.add_attachment(gpkg_filename) if len(regions_lessMMU) <= MAX_REPORTED_REGION_COUNT: status.failed( "The data source has {:d} error objects under MMU limit of {:d} pixels." .format(len(regions_lessMMU), params["area_pixels"])) else: status.failed( "The data source has more than {:d} error objects under MMU limit of {:d} pixels." .format(MAX_REPORTED_REGION_COUNT, params["area_pixels"]))