Ejemplo n.º 1
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    # extract validcodes parameter. An item in validcodes can be a single number or a range.
    valid_codes = []
    for validcode_item in params["validcodes"]:
        if type(validcode_item) is list:
            valid_codes = valid_codes + list(range(validcode_item[0], validcode_item[1] + 1))
        else:
            valid_codes.append(validcode_item)


    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # get dictionary of pixel 'codes-counts'
        ds_band = ds.GetRasterBand(1)
        counts = ds_band.GetHistogram(approx_ok=False)
        codes = range(len(counts))
        hist = dict(zip(codes, counts))

        # get list of 'used' codes (with non-zero pixel count)
        used_codes = [i for i in hist if hist[i] != 0]

        # check particular codes against given list of valid codes
        invalid_codes = list()
        for code in used_codes:
            if code not in valid_codes:
                invalid_codes.append(str(code))
        if len(invalid_codes) > 0:
            invalid_codes_str = ', '.join(invalid_codes)
            status.failed("Layer {:s} has pixels with invalid values: {:s}."
                          .format(layer_def["src_layer_name"], invalid_codes_str))
Ejemplo n.º 2
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    # enable gdal to use exceptions
    gdal.UseExceptions()

    # set compression type names to lowercase
    allowed_compression_types = [c.lower() for c in params["compression"]]

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # get raster metadata
        meta = ds.GetMetadata("IMAGE_STRUCTURE")

        compression = meta.get("COMPRESSION", None)

        if compression is None:
            status.failed("Layer {:s} does not have raster data compression set.".format(layer_def["src_layer_name"]))
            continue

        if compression.lower() not in allowed_compression_types:
            status.failed("The raster compression type '{:s}' of layer {:s} is not allowed."
                          .format(compression, layer_def["src_layer_name"]))
Ejemplo n.º 3
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    grid_size = params.get("grid_size", 1000)

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # upper-left coordinate divided by pixel-size must leave no remainder
        gt = ds.GetGeoTransform()
        ulx = gt[0]
        uly = gt[3]
        pixelsizex = gt[1]
        pixelsizey = gt[5]

        if "grid_size" in params and params["grid_size"] >= pixelsizex and params["grid_size"] >= pixelsizey:
            if ulx % pixelsizex != 0 or uly % pixelsizey != 0:
                status.failed("The upper-left X, Y coordinates of layer {:s} are not divisible by pixel-size with no remainder."
                              .format(layer_def["src_layer_name"]))

        # Layers must fit to the specified grid
        if ulx % grid_size != 0 or uly % grid_size != 0:
            status.failed("The raster origin of layer {:s} does not align to the LEAC {} metre grid."
                          .format(layer_def["src_layer_name"], grid_size))
Ejemplo n.º 4
0
def run_check(params, status):
    import osgeo.ogr as ogr
    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):
        # check for .vat.dbf file existence
        dbf_filename = "{:s}.vat.dbf".format(layer_def["src_filepath"].name)
        dbf_filepath = layer_def["src_filepath"].with_name(dbf_filename)

        if not dbf_filepath.is_file():
            status.failed(
                "Attribute table file (.vat.dbf) for layer {:s} is missing.".
                format(layer_def["src_filepath"].name))
            continue

        ds = ogr.Open(str(dbf_filepath))
        layer = ds.GetLayer()
        attr_names = [field_defn.name for field_defn in layer.schema]
        missing_attr_regexes = []
        for attr_regex in params["attribute_regexes"]:
            is_missing = True
            for attr_name in attr_names:
                mobj = re.match("{:s}$".format(attr_regex), attr_name,
                                re.IGNORECASE)
                if mobj is not None:
                    is_missing = False
                    break
            if is_missing:
                missing_attr_regexes.append(attr_regex)
        if len(missing_attr_regexes) > 0:
            missing_attr_message = ", ".join(missing_attr_regexes)
            status.failed(
                "Raster attribute table {:s} has missing attributes: {:s}.".
                format(dbf_filepath.name, missing_attr_message))
Ejemplo n.º 5
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    expected_datatype = params["datatype"]

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # Get the DataType of the band ("Byte" means 8-bit depth).
        band = ds.GetRasterBand(1)
        actual_datatype = gdal.GetDataTypeName(band.DataType)

        # Compare actual data type to expected data type.
        if str(actual_datatype).lower() != str(expected_datatype).lower():
            status.failed("Layer {:s}: The raster data type '{:s}' does not match the expected data type '{:s}'."
                          .format(layer_def["src_layer_name"], actual_datatype, expected_datatype))
Ejemplo n.º 6
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # get dictionary of pixel 'codes-counts'
        ds_band = ds.GetRasterBand(1)
        blocksize = ds_band.GetBlockSize()
        if blocksize[0] > params["max_blocksize"] or blocksize[1] > params[
                "max_blocksize"]:
            status.aborted(
                "Layer {:s} has block size [{:d}, {:d}]. "
                "Maximum allowed block height or width is {:d}.".format(
                    layer_def["src_layer_name"], blocksize[0], blocksize[1],
                    params["max_blocksize"]))
Ejemplo n.º 7
0
def run_check(params, status):
    import osgeo.gdal as gdal
    import osgeo.osr as osr

    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        srs = osr.SpatialReference(ds.GetProjection())
        if srs is None or srs.IsProjected() == 0:
            status.failed("The raster {:s} has SRS missing.".format(layer_def["src_layer_name"]))
            continue

        # Search EPSG authority code
        srs.AutoIdentifyEPSG()
        authority_name = srs.GetAuthorityName(None)
        authority_code = srs.GetAuthorityCode(None)

        if authority_name == "EPSG" and authority_code is not None:
            try:
                authority_code = int(authority_code)
            except ValueError:
                status.aborted("The raster {:s} has non integer epsg code {:s}".format(layer_def["src_layer_name"], authority_code))
            else:
                if authority_code != params["epsg"]:
                    status.aborted("The raster {:s} has illegal EPSG code {:d}."
                                   .format(layer_def["src_layer_name"], authority_code))
        elif params.get("auto_identify_epsg", False):
            # Parameter auto_identify_epsg can be used for less-strict checking of .prj files.
            # There is a built-in function in GDAL 2.3 with matching logic.
            is_detected = False
            expected_srs = osr.SpatialReference()
            expected_srs.ImportFromEPSG(params["epsg"])
            if srs.IsSame(expected_srs):
                # The auto-detected epsg is made available for other checks.
                status.add_params({"detected_epsg": params["epsg"]})
            else:
                status.aborted("The raster {:s} does not have an epsg code and the epsg code can not be detected, srs: {:s}."
                               .format(layer_def["src_layer_name"], srs.ExportToWkt()))
        else:
            status.aborted("The raster {:s} has epsg code missing, srs: {:s}."
                           .format(layer_def["src_layer_name"], srs.ExportToWkt()))
Ejemplo n.º 8
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # get dictionary of pixel 'codes-counts'
        ds_band = ds.GetRasterBand(1)
        band_nodata = ds_band.GetNoDataValue()

        if band_nodata is None:
            status.aborted(
                "Layer {:s} does not have a NoData value set.".format(
                    layer_def["src_layer_name"]))
        elif band_nodata != params["nodata_value"]:
            status.aborted(
                "Layer {:s} has invalid NoData value: {}. The expected NoData value is {}."
                .format(layer_def["src_layer_name"], band_nodata,
                        params["nodata_value"]))
Ejemplo n.º 9
0
def run_check(params, status):
    from qc_tool.vector.helper import do_inspire_check
    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):

        # Locate a 'metadata' subdirectory inside the delivery.
        # Metadata directory name is case-insensitive, 'Metadata' and 'metadata' are both allowed.
        metadata_dirs = [
            d for d in params["unzip_dir"].glob('**/*')
            if d.is_dir() and str(d).lower().endswith(METADATA_DIRNAME)
        ]
        if len(metadata_dirs) == 0:
            status.info(
                "The delivery does not contain the expected '{:s}' folder".
                format(METADATA_DIRNAME))
            return
        elif len(metadata_dirs) > 1:
            status.info(
                "Multiple folders named '{:s}' were found in the delivery.",
                "Only one '{:s}' folder is allowed.".format(METADATA_DIRNAME))
            return
        else:
            metadata_dir = metadata_dirs[0]

        # If the Metadata directory exists, try to locate the .xml file inside it.
        xml_filepath = locate_xml_file(metadata_dir, layer_def["src_filepath"])
        if xml_filepath is None:
            status.info(
                "The delivery does not contain the expected metadata file '{:s}/{:s}.xml'"
                .format(metadata_dir.stem, layer_def["src_filepath"].stem))
            return

        # Validate the xml file using INSPIRE validator service
        export_prefix = "s{:02d}_{:s}_inspire".format(
            params["step_nr"], layer_def["src_filepath"].stem)
        do_inspire_check(xml_filepath, export_prefix, params["output_dir"],
                         status)
Ejemplo n.º 10
0
def run_check(params, status):
    import osgeo.gdal as gdal

    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):
        ds = gdal.Open(str(layer_def["src_filepath"]))

        # get raster pixel size
        gt = ds.GetGeoTransform()
        x_size = abs(gt[1])
        y_size = abs(gt[5])

        # verify the square shape of the pixel
        if x_size != y_size:
            status.failed("The pixel is not square-shaped.")
            return

        if x_size != params["pixelsize"]:
            status.failed(
                "Layer {:s} has raster pixel size {:f} m, {:f} m is allowed.".
                format(layer_def["src_layer_name"], x_size,
                       params["pixelsize"]))
Ejemplo n.º 11
0
def run_check(params, status):
    import subprocess
    import numpy
    import osgeo.gdal as gdal
    import osgeo.osr as osr

    from qc_tool.raster.helper import do_raster_layers
    from qc_tool.raster.helper import find_tiles
    from qc_tool.raster.helper import rasterize_mask
    from qc_tool.raster.helper import read_tile
    from qc_tool.raster.helper import write_progress
    from qc_tool.raster.helper import write_percent

    aoi_code = params["aoi_code"].lower()
    gap_value_ds = params["outside_area_code"]
    du_column_name = params.get("du_column_name", None)
    mask_align_grid = params.get("mask_align_grid", MASK_ALIGN_GRID)

    # Find the external boundary raster mask layer.
    raster_boundary_dir = params["boundary_dir"].joinpath("raster")
    vector_boundary_dir = params["boundary_dir"].joinpath("vector")
    mask_ident = "default"
    if "mask" in params:
        mask_ident = params["mask"]
    mask_ident = mask_ident.lower()

    for layer_def in do_raster_layers(params):

        # set this to true for writing partial progress to a text file.
        report_progress = True
        progress_filename = "{:s}_{:s}_progress.txt".format(
            __name__.split(".")[-1], layer_def["src_filepath"].stem)
        progress_filepath = params["output_dir"].joinpath(progress_filename)
        percent_filename = "{:s}_{:s}_percent.txt".format(
            __name__.split(".")[-1], layer_def["src_filepath"].stem)
        percent_filepath = params["output_dir"].joinpath(percent_filename)

        # get raster corners and resolution
        ds = gdal.Open(str(layer_def["src_filepath"]))
        ds_gt = ds.GetGeoTransform()
        ds_ulx = ds_gt[0]
        ds_xres = ds_gt[1]
        ds_uly = ds_gt[3]
        ds_yres = ds_gt[5]
        ds_lrx = ds_ulx + (ds.RasterXSize * ds_xres)
        ds_lry = ds_uly + (ds.RasterYSize * ds_yres)

        # Check availability of mask.

        # The mask is either retrieved from boundary package or it is generated using mask_ident, du_column_name and aoi_code.
        if mask_ident.endswith(".gpkg") or mask_ident.endswith(".shp"):
            mask_vector_filepath = vector_boundary_dir.joinpath(mask_ident)
            if not mask_vector_filepath.exists():
                status.info(
                    "Check cancelled due to boundary vector file {:s} not available."
                    .format(mask_ident))
                return
            mask_file = rasterize_mask(mask_vector_filepath, int(ds_xres),
                                       params["du_column_name"], aoi_code,
                                       mask_align_grid, ds_ulx, ds_uly,
                                       params["output_dir"])
        else:
            mask_file = raster_boundary_dir.joinpath(
                "mask_{:s}_{:03d}m_{:s}.tif".format(mask_ident, int(ds_xres),
                                                    aoi_code))

        if not mask_file.exists():
            status.info(
                "Check cancelled due to boundary mask file {:s} not available."
                .format(mask_file.name))
            return
        mask_ds = gdal.Open(str(mask_file))
        if mask_ds is None:
            status.info(
                "Check cancelled due to boundary mask file {:s} not available."
                .format(mask_file.name))
            return
        mask_band = mask_ds.GetRasterBand(1)
        nodata_value_mask = mask_band.GetNoDataValue()

        # get aoi mask corners and resolution
        mask_gt = mask_ds.GetGeoTransform()
        mask_ulx = mask_gt[0]
        mask_xres = mask_gt[1]
        mask_uly = mask_gt[3]
        mask_yres = mask_gt[5]
        mask_lrx = ds_ulx + (mask_ds.RasterXSize * mask_xres)
        mask_lry = ds_uly + (mask_ds.RasterYSize * mask_yres)

        # Check if the dataset extent intersects the mask extent.
        if (mask_ulx > ds_lrx or mask_uly < ds_lry or mask_lrx < ds_ulx
                or mask_lry > ds_uly):
            if mask_ident.endswith(".gpkg") or mask_ident.endswith(".shp"):
                extent_message = "Layer {:s} does not intersect any AOI polygon with {:s}={:s} from boundary {:s}."
                extent_message = extent_message.format(
                    layer_def["src_filepath"].name, du_column_name, aoi_code,
                    mask_ident)
            else:
                extent_message = "Layer {:s} does not intersect the AOI mask {:s}."
                extent_message = extent_message.format(
                    layer_def["src_layer_name"], mask_ident)
            extent_message += "Raster extent: [{:f} {:f}, {:f} {:f}]".format(
                ds_ulx, ds_uly, ds_lrx, ds_lry)
            extent_message += "AOI extent: [{:f} {:f}, {:f} {:f}]".format(
                mask_ulx, mask_uly, mask_lrx, mask_lry)
            status.info(extent_message)
            continue

        # Check if the raster and the AOI mask have the same resolution.
        if ds_xres != mask_xres or ds_yres != mask_yres:
            status.info(
                "Resolution of the raster [{:f}, {:f}] does not match "
                "the resolution [{:f}, {:f}] of the boundary mask {:s}.tif.".
                format(ds_xres, ds_yres, mask_xres, mask_yres, mask_ident))
            continue

        # Check if coordinates of the raster origin are whole integers.
        if not ds_ulx.is_integer() or not ds_uly.is_integer():
            status.info(
                "Coordinates of the raster origin ({:f}, {:f}) are not whole integers."
                .format(ds_ulx, ds_uly))
            continue

        # Check if origin of mask is aligned with origin of raster.
        if abs(ds_ulx - mask_ulx) % ds_xres > 0:
            status.info(
                "X coordinates of the raster are not exactly aligned with x coordinates of boundary mask."
                "Raster origin: {:f}, Mask origin: {:f}".format(
                    ds_ulx, mask_ulx))
            continue

        if abs(ds_uly - mask_uly) % ds_yres > 0:
            status.info(
                "Y coordinates of the raster are not exactly aligned with Y coordinates of boundary mask."
                "Raster origin: {:f}, Mask origin: {:f}".format(
                    ds_uly, mask_uly))
            continue

        if report_progress:
            msg = "ds_ulx: {:f} mask_ulx: {:f}".format(ds_ulx, mask_ulx)
            msg += "\nds_uly: {:f} mask_uly: {:f}".format(ds_uly, mask_uly)
            msg += "\nRasterXSize: {:d} RasterYSize: {:d}".format(
                ds.RasterXSize, ds.RasterYSize)
            write_progress(progress_filepath, msg)

        # Find the tiles
        tiles = find_tiles(ds, mask_ds)
        if report_progress:
            write_progress(progress_filepath,
                           "Number of tiles: {:d}".format(len(tiles)))

        # processing all the tiles:
        ds_band = ds.GetRasterBand(1)

        # retrieval of NoData value:
        if gap_value_ds == "NODATA":
            gap_value_ds = ds_band.GetNoDataValue()

        gap_count_total = 0
        num_tiles = len(tiles)
        gap_filepaths = []
        for tile_no, tile in enumerate(tiles):

            write_progress(
                progress_filepath,
                "Processing tile {}/{} ({}).".format(tile_no + 1, len(tiles),
                                                     tile.position))

            # reading the mask data into Numpy array
            mask_xoff = tile.x_offset
            mask_yoff = tile.y_offset
            blocksize_x = tile.ncols
            blocksize_y = tile.nrows
            arr_mask = mask_band.ReadAsArray(mask_xoff, mask_yoff, blocksize_x,
                                             blocksize_y)

            # If mask has all values unmapped then mask / raster comparison can be skipped.
            if numpy.max(arr_mask) == 0 or numpy.min(
                    arr_mask) == nodata_value_mask:
                write_progress(
                    progress_filepath,
                    "Tile {} has all values outside of mask, skipping.".format(
                        tile_no + 1))
                continue

            if tile.position == "outside":
                # Current tile is completely outside the bounds of the checked raster.
                write_progress(progress_filepath, "tile_position: outside.")
                arr_gaps = (arr_mask == 1)
            elif tile.position == "inside":
                # Current tile is completely inside the bounds of the checked raster.
                arr_ds = read_tile(ds, tile, gap_value_ds)
                arr_gaps = ((arr_mask == 1) * (arr_ds == gap_value_ds))
            else:
                # Current tile is partially inside and partially outside the bounds of the checked raster.
                arr_ds = read_tile(ds, tile, gap_value_ds)
                arr_gaps = ((arr_mask == 1) * (arr_ds == gap_value_ds))

            # find unmapped pixels inside mask
            gap_count = int(numpy.sum(arr_gaps))
            if gap_count > 0:

                # For each mask tile with gaps, create a new warning raster dataset.
                # These datasets can be merged or polygonized at the end of the run.
                src_stem = layer_def["src_filepath"].stem
                gap_ds_filename = "s{:02d}_{:s}_gap_warning_{:d}.tif".format(
                    params["step_nr"], src_stem, tile_no)
                gap_ds_filepath = params["tmp_dir"].joinpath(gap_ds_filename)
                driver = gdal.GetDriverByName('GTiff')
                gap_ds = driver.Create(str(gap_ds_filepath), blocksize_x,
                                       blocksize_y, 1, gdal.GDT_Byte,
                                       ['COMPRESS=LZW'])
                gap_ds.SetGeoTransform(
                    [tile.xmin, mask_xres, 0, tile.ymax, 0, mask_yres])
                gap_sr = osr.SpatialReference()
                gap_sr.ImportFromWkt(ds.GetProjectionRef())
                gap_ds.SetProjection(gap_sr.ExportToWkt())
                gap_band = gap_ds.GetRasterBand(1)
                gap_band.SetNoDataValue(0)
                gap_band.WriteArray(arr_gaps.astype("byte"), 0, 0)
                gap_ds.FlushCache()
                gap_ds = None
                gap_filepaths.append(str(gap_ds_filepath))
                gap_count_total += gap_count

            if report_progress:
                msg = "tile: {:d}/{:d} ({:s}), gaps: {:d}".format(
                    tile_no + 1, num_tiles, tile.position, gap_count)
                write_progress(progress_filepath, msg)
                progress_percent = int(100 * (tile_no / num_tiles))
                write_percent(percent_filepath, progress_percent)

        # Free memory for checked raster and for mask.
        ds = None
        ds_mask = None

        # Generate attachments.
        if gap_count_total > 0:
            # Merge previously generated tile gap rasters into a .vrt
            src_stem = layer_def["src_filepath"].stem
            warning_vrt_filename = "s{:02d}_{:s}_gap_warning.vrt".format(
                params["step_nr"], src_stem)
            warning_vrt_filepath = params["tmp_dir"].joinpath(
                warning_vrt_filename)

            if len(gap_filepaths) > 0:
                cmd = ["gdalbuildvrt", str(warning_vrt_filepath)]
                cmd = cmd + gap_filepaths
                write_progress(progress_filepath, " ".join(cmd))
                subprocess.run(cmd)
                status.info(
                    "Layer {:s} has {:d} gap pixels in the mapped area.".
                    format(layer_def["src_layer_name"], gap_count_total))

            # Convert the .vrt to a GeoTiff
            if warning_vrt_filepath.is_file():
                warning_tif_filename = "s{:02d}_{:s}_gap_warning.tif".format(
                    params["step_nr"], src_stem)
                warning_tif_filepath = params["output_dir"].joinpath(
                    warning_tif_filename)
                cmd = [
                    "gdal_translate", "-of", "GTiff", "-ot", "Byte", "-co",
                    "TILED=YES", "-co", "COMPRESS=LZW",
                    str(warning_vrt_filepath),
                    str(warning_tif_filepath)
                ]
                subprocess.run(cmd)
                status.add_attachment(warning_tif_filepath.name)
Ejemplo n.º 12
0
def run_check(params, status):
    import osgeo.gdal as gdal
    from qc_tool.raster.helper import do_raster_layers

    for layer_def in do_raster_layers(params):

        geotiff_name = layer_def["src_filepath"].name

        ds = gdal.Open(str(layer_def["src_filepath"]))
        band = ds.GetRasterBand(1)
        bit_depth = str(gdal.GetDataTypeName(band.DataType)).lower()

        # Colour tables can only be checked for specific bit depths.
        if str(bit_depth) not in BIT_DEPTHS_WITH_COLORTABLE:
            status.info(
                "The raster {:s} is in {:s} bit depth and thus it is not possible to check for colour table"
                .format(layer_def["src_layer_name"], bit_depth))
            return

        # check the color table of the band
        ct = band.GetRasterColorTable()
        if ct is None:
            status.failed(
                "The raster {:s} has embedded color table missing.".format(
                    layer_def["src_layer_name"]))
            return

        # read-in the actual color table into a dictionary
        color_table_count = ct.GetCount()
        actual_colors = {}
        for i in range(0, color_table_count):
            entry = ct.GetColorEntry(i)
            if not entry:
                continue
            # converting a GDAL ColorEntry (r,g,b,a) tuple to a [r,g,b] list
            actual_colors[str(i)] = list(entry[0:3])

        # compare expected color table with the actual color table
        missing_codes = []
        incorrect_colors = []
        expected_colors = params["colors"]
        for code, color in expected_colors.items():
            if code not in actual_colors:
                missing_codes.append(code)
            elif expected_colors[code] != actual_colors[code]:
                incorrect_colors.append({
                    "class": code,
                    "expected": expected_colors[code],
                    "actual": actual_colors[code]
                })

        # report raster values with missing entries in the color table
        if len(missing_codes) > 0:
            status.failed(
                "The raster color table embedded in {:s} does not have entries for raster values {:s}."
                .format(geotiff_name, ", ".join(missing_codes)))
            continue

        # report color mismatches between expected and actual color table
        if len(incorrect_colors) > 0:
            color_reports = []
            for c in incorrect_colors:
                color_reports.append(
                    "value:{0}, expected RGB:{1}, actual RGB:{2}".format(
                        c["class"], c["expected"], c["actual"]))
            status.failed(
                "The raster color table has some incorrect colors. {:s}".
                format("; ".join(color_reports)))
            continue

        # Check existence of a .tif.clr or .clr file.
        clr_name1 = str(layer_def["src_filepath"]).replace(".tif", ".clr")
        clr_filepath1 = Path(clr_name1)
        clr_filename1 = clr_filepath1.name

        clr_name2 = str(layer_def["src_filepath"]).replace(".tif", ".tif.clr")
        clr_filepath2 = Path(clr_name2)
        clr_filename2 = clr_filepath2.name

        if clr_filepath1.is_file():
            clr_filepath = clr_filepath1
            clr_filename = clr_filename1
        elif clr_filepath2.is_file():
            clr_filepath = clr_filepath2
            clr_filename = clr_filename2
        else:
            status.failed(
                "The expected color table text file {:s} or {:s} is missing.".
                format(clr_filename1, clr_filename2))
            continue

        # read-in the actual tif.clr color table into a dictionary
        try:
            actual_colors = parse_clr_file(clr_filepath)
        except ValueError:
            status.failed(
                "The color table text file {:s} is in incorrect format.".
                format(clr_filename))
            continue

        # Check colors in .tif.clr file
        missing_codes = []
        incorrect_colors = []
        expected_colors = params["colors"]
        for code, color in expected_colors.items():
            if code not in actual_colors:
                missing_codes.append(code)
            elif expected_colors[code] != actual_colors[code]:
                incorrect_colors.append({
                    "class": code,
                    "expected": expected_colors[code],
                    "actual": actual_colors[code]
                })

        # report raster values with missing entries in the color table
        if len(missing_codes) > 0:
            status.failed(
                "The raster color table text file {:s} does not have entries for raster values {:s}."
                .format(clr_filename, ", ".join(missing_codes)))

        # report color mismatches between expected and actual color table
        if len(incorrect_colors) > 0:
            color_reports = []
            for c in incorrect_colors:
                color_reports.append(
                    "value:{0}, expected RGB:{1}, actual RGB:{2}".format(
                        c["class"], c["expected"], c["actual"]))
            status.failed(
                "The raster color text file {:s} has some incorrect colors. {:s}"
                .format(clr_filename, "; ".join(color_reports)))
Ejemplo n.º 13
0
def run_check(params, status):
    import osgeo.gdal as gdal
    import skimage.measure as measure

    from qc_tool.raster.helper import do_raster_layers

    # set this to true for reporting partial progress to a _progress.txt file.
    report_progress = True

    # Determine if a "reclassify" step is required based on the "groupcodes" parameter setting.
    if "groupcodes" in params and params["groupcodes"] is not None and len(
            params["groupcodes"]) > 0:
        use_reclassify = True
    else:
        use_reclassify = False

    # NoData value can optionally be set as parameter
    # cell values with NODATA are excluded from MMU analysis.
    if "nodata_value" in params:
        NODATA = params["nodata_value"]
    else:
        NODATA = -1  # FIXME use a value that is outside of the range of possible raster values.

    # The optional report_exceptions parameter indicates whether any exceptions should be reported.
    report_exceptions = params.get("report_exceptions", True)

    # size of a raster tile. Should be a multiple of 256 because GeoTiff stores its data in 256*256 pixel blocks.
    BLOCKSIZE = 2048
    MMU = params["area_pixels"]

    # Some classes can optionally be excluded from MMU requirements.
    # Pixels belonging to these classes are reported as exceptions.
    if "value_exception_codes" in params:
        exclude_values = params["value_exception_codes"]
    else:
        exclude_values = []

    # neighbouring values to exclude.
    # patches with area < MMU which touch a patch having class in neighbour_exclude_values are reported
    # as exceptions.
    if "neighbour_exception_codes" in params:
        neighbour_exclude_values = params["neighbour_exception_codes"]
    else:
        neighbour_exclude_values = []

    for layer_def in do_raster_layers(params):
        progress_filename = "{:s}_{:s}_progress.txt".format(
            __name__.split(".")[-1], layer_def["src_filepath"].stem)
        progress_filepath = params["output_dir"].joinpath(progress_filename)
        percent_filename = "{:s}_{:s}_percent.txt".format(
            __name__.split(".")[-1], layer_def["src_filepath"].stem)
        percent_filepath = params["output_dir"].joinpath(percent_filename)

        # The checked raster is not read into memory as a whole. Instead it is read in tiles.
        # Instead, ReadAsArray is used to read subsets of the raster (tiles) on demand.
        ds = gdal.Open(str(layer_def["src_filepath"]))

        nRasterCols = ds.RasterXSize
        nRasterRows = ds.RasterYSize

        # tile buffer width. Must be bigger then number of pixels in MMU.
        buffer_width = MMU + 1

        # special handling of very small rasters - no need to split in multiple tiles
        if buffer_width >= nRasterCols:
            buffer_width = 0
        if nRasterCols <= BLOCKSIZE or nRasterRows <= BLOCKSIZE:
            buffer_width = 0

        blocksize_with_buffer = BLOCKSIZE + 2 * buffer_width

        # Detected patches with area<MMU will be stored in this list.
        regions_lessMMU = []

        # Exception patches with area<MMU and belonging to exclude class.
        regions_lessMMU_except = []

        nTileCols = int(math.ceil(nRasterCols / BLOCKSIZE))
        nTileRows = int(math.ceil(nRasterRows / BLOCKSIZE))
        last_col = nTileCols - 1
        last_row = nTileRows - 1

        if report_progress:
            msg = "processing {:d} tiles: {:d} rows, {:d} columns".format(
                nTileRows * nTileCols, nTileRows, nTileCols)
            write_progress(progress_filepath, msg)

        # TILES: ITERATE ROWS
        for tileRow in range(nTileRows):

            if report_progress:
                progress_percent = int(100 * ((tileRow + 1) / nTileRows))
                write_percent(percent_filepath, progress_percent)

            if tileRow == 0:
                # First row
                yOff = 0
                yOffInner = 0
                yOffRelative = 0
                block_height = BLOCKSIZE + buffer_width
                block_height_inner = BLOCKSIZE
            else:
                # Middle row
                yOff = tileRow * BLOCKSIZE
                yOffInner = yOff + buffer_width
                yOffRelative = buffer_width
                block_height = blocksize_with_buffer
                block_height_inner = BLOCKSIZE

            if tileRow == last_row:
                # Last row is a special case - block width must be adjusted.
                block_height = nRasterRows - yOff
                block_height_inner = block_height - buffer_width

            # if row tile only contains buffer zone and no inner zone, skip..
            if block_height <= buffer_width:
                continue

            # if we reached the maximum number of patches < MMU, then report message and exit.
            if len(regions_lessMMU) > MAX_REPORTED_REGION_COUNT:
                break

            # TILES: ITERATE COLUMNS
            for tileCol in range(nTileCols):
                if tileCol == 0:
                    # First column
                    xOff = 0
                    xOffInner = 0
                    xOffRelative = 0
                    block_width = BLOCKSIZE + buffer_width
                    block_width_inner = BLOCKSIZE
                else:
                    # Middle column
                    xOff = tileCol * BLOCKSIZE
                    xOffInner = xOff + buffer_width
                    xOffRelative = buffer_width
                    block_width = blocksize_with_buffer
                    block_width_inner = BLOCKSIZE

                if tileCol == last_col:
                    # Last column is a special case - block width must be adjusted.
                    block_width = nRasterCols - xOff
                    block_width_inner = block_width - buffer_width

                # if column tile only contains buffer zone and no inner zone, skip..
                if block_width <= buffer_width:
                    continue

                # if we reached the maximum number of patches < MMU, then report message and exit.
                if len(regions_lessMMU) > MAX_REPORTED_REGION_COUNT:
                    break

                # read whole array (with buffers)
                tile_buffered = ds.ReadAsArray(xOff, yOff, block_width,
                                               block_height)

                # special case: if the tile has all values equal then skip MMU checks.
                if tile_buffered.min() == tile_buffered.max():
                    if report_progress:
                        msg_tile = "tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h} all values same."
                        msg_tile = msg_tile.format(tr=tileRow,
                                                   ntr=nTileRows,
                                                   tc=tileCol,
                                                   w=block_width_inner,
                                                   h=block_height_inner)
                        write_progress(progress_filepath, msg_tile)
                    continue

                # reclassify inner tile array if some patches should be grouped together
                if use_reclassify:
                    # tile_inner = reclassify_values(tile_inner, params["groupcodes"])
                    tile_buffered = reclassify_values(tile_buffered,
                                                      params["groupcodes"])

                # inner tile is subset of buffered tile
                tile_inner = tile_buffered[yOffRelative:yOffRelative +
                                           block_height_inner,
                                           xOffRelative:xOffRelative +
                                           block_width_inner]

                # read inner array (without buffer)

                # label the inner array and find patches < MMU
                labels_inner = measure.label(tile_inner,
                                             background=NODATA,
                                             connectivity=1)
                regions_inner = measure.regionprops(labels_inner)
                regions_inner_lessMMU = [
                    r for r in regions_inner if r.area < MMU
                ]

                # find lessMMU patches inside inner array not touching edge
                regions_lessMMU_edge = [
                    r for r in regions_inner_lessMMU
                    if r.bbox[0] == 0 or r.bbox[1] == 0 or r.bbox[2] ==
                    block_width_inner or r.bbox[3] == block_height_inner
                ]
                labels_lessMMU_edge = [r.label for r in regions_lessMMU_edge]

                regions_lessMMU_inside = [
                    r for r in regions_inner_lessMMU
                    if r.label not in labels_lessMMU_edge
                ]

                # progress reporting..
                if report_progress:
                    msg = "tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h}"
                    msg = msg.format(tr=tileRow,
                                     ntr=nTileRows,
                                     tc=tileCol,
                                     w=block_width_inner,
                                     h=block_height_inner)
                    if len(regions_lessMMU_inside) > 0:
                        msg += " found {:d} areas < MMU".format(
                            len(regions_lessMMU_inside), tileRow, tileCol)
                    write_progress(progress_filepath, msg)

                # inspect inner patches
                for r in regions_lessMMU_inside:
                    first_coord_x = r.coords[0][0]
                    first_coord_y = r.coords[0][1]
                    lessMMU_value = tile_inner[first_coord_x, first_coord_y]

                    # convert relative coords to absolute. coords are stored as [row, column].
                    absolute_coords = [[c[1] + xOffInner, c[0] + yOffInner]
                                       for c in r.coords]

                    lessMMU_info = {
                        "tileRow": tileRow,
                        "tileCol": tileCol,
                        "area": r.area,
                        "value": lessMMU_value,
                        "coords": absolute_coords
                    }
                    if lessMMU_value in exclude_values:
                        if report_exceptions:
                            regions_lessMMU_except.append(lessMMU_info)
                    elif patch_touches_cell_with_value(
                            r.coords, tile_inner, neighbour_exclude_values):
                        if report_exceptions:
                            regions_lessMMU_except.append(lessMMU_info)
                    elif patch_touches_raster_edge(absolute_coords,
                                                   nRasterRows, nRasterCols):
                        if report_exceptions:
                            regions_lessMMU_except.append(lessMMU_info)
                    else:
                        regions_lessMMU.append(lessMMU_info)

                # no need to read-in buffered tile if there are no suspect lessMMU patches at edge of inner tile
                if len(regions_lessMMU_edge) == 0:
                    continue
                elif report_progress:
                    msg = "tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h} INSPECTING EDGE PATCHES"
                    msg = msg.format(tr=tileRow,
                                     ntr=nTileRows,
                                     tc=tileCol,
                                     w=block_width,
                                     h=block_height)
                    write_progress(progress_filepath, msg)

                # processing the outer array expanded by buffer with width=number of pixels in MMU

                # optimization: set pixels not within buffer zone (deep inside outer array) to background.
                inner_buf_startcol = xOffRelative + MMU
                inner_buf_startrow = yOffRelative + MMU
                inner_buf_endcol = xOffRelative + block_width_inner - MMU
                inner_buf_endrow = yOffRelative + block_height_inner - MMU
                if inner_buf_endcol > inner_buf_startcol and inner_buf_endrow > inner_buf_startrow:
                    tile_buffered[inner_buf_startrow:inner_buf_endrow,
                                  inner_buf_startcol:inner_buf_endcol] = NODATA

                labels_buf = measure.label(tile_buffered,
                                           background=NODATA,
                                           connectivity=1)
                buf_regions = measure.regionprops(labels_buf)
                buf_regions_small = [r for r in buf_regions if r.area < MMU]
                buf_labels_small = [r.label for r in buf_regions_small]

                # edge_regions_small is used for reporting only.
                edge_regions_small = []

                for r in regions_lessMMU_edge:
                    first_coord_x = r.coords[0][0]
                    first_coord_y = r.coords[0][1]
                    val = tile_inner[first_coord_x, first_coord_y]

                    # get corresponding value of tile edge patch in buffered array..
                    # if the inner tile edge patch has area < MMU also in the expanded tile, report it.
                    coord_x_buf = first_coord_x + xOffRelative
                    coord_y_buf = first_coord_y + yOffRelative
                    lbl_buf = labels_buf[coord_x_buf, coord_y_buf]
                    if lbl_buf in buf_labels_small:
                        r_buf = buf_regions[lbl_buf - 1]
                        edge_regions_small.append(r)

                        # coordinates are specified as row, column..
                        # convert [row in tile, column in tile] to [source raster column, source raster row]
                        absolute_coords = [[c[1] + xOff, c[0] + yOff]
                                           for c in r_buf.coords]

                        lessMMU_info = {
                            "tileRow": tileRow,
                            "tileCol": tileCol,
                            "area": r_buf.area,
                            "value": val,
                            "coords": absolute_coords
                        }

                        # handling special cases (exception patches)
                        if val in exclude_values:
                            regions_lessMMU_except.append(lessMMU_info)
                        elif patch_touches_cell_with_value(
                                r_buf.coords, tile_buffered,
                                neighbour_exclude_values):
                            regions_lessMMU_except.append(lessMMU_info)
                        elif patch_touches_raster_edge(absolute_coords,
                                                       nRasterRows,
                                                       nRasterCols):
                            regions_lessMMU_except.append(lessMMU_info)
                        else:
                            regions_lessMMU.append(lessMMU_info)

                if report_progress and len(edge_regions_small) > 0:
                    # report actual edge regions < MMU after applying buffer
                    msg = "xOff {:d} yOff {:d} xOffInner {:d} yOffInner {:d}".format(
                        xOff, yOff, xOffInner, yOffInner)
                    write_progress(progress_filepath, msg)
                    msg = "BUFFER: tileRow: {tr}/{ntr} tileCol: {tc} width: {w} height: {h}"
                    msg = msg.format(tr=tileRow,
                                     ntr=nTileRows,
                                     tc=tileCol,
                                     w=block_width,
                                     h=block_height)
                    num_edge_patches = len(edge_regions_small)
                    msg += " found {:d} edge areas < MMU in tile {:d}, {:d}".format(
                        num_edge_patches, tileRow, tileCol)
                    write_progress(progress_filepath, msg)

        # Export errors and exceptions to geopackage.
        # The geopackage contains one sample point from each lessMMU patch.

        ## lessMMU patches belonging to one of exclude_values classes are reported as exceptions.
        if report_exceptions and len(regions_lessMMU_except) > 0:
            gpkg_filename = "s{:02d}_{:s}_lessmmu_exception.gpkg".format(
                params["step_nr"], layer_def["src_filepath"].stem)
            gpkg_filepath = params["output_dir"].joinpath(gpkg_filename)
            export(regions_lessMMU_except, ds, gpkg_filepath,
                   MAX_REPORTED_REGION_COUNT)
            status.add_attachment(gpkg_filename)
            if len(regions_lessMMU_except) <= MAX_REPORTED_REGION_COUNT:
                status.info(
                    "The data source has {:d} exceptional objects under MMU limit of {:d} pixels."
                    .format(len(regions_lessMMU_except),
                            params["area_pixels"]))
            else:
                status.info(
                    "The data source has more than {:d} exceptional objects under MMU limit of {:d} pixels."
                    .format(MAX_REPORTED_REGION_COUNT, params["area_pixels"]))

        ## lessMMU patches not belonging to exclude_values are reported as errors.
        if len(regions_lessMMU) > 0:
            gpkg_filename = "s{:02d}_{:s}_lessmmu_error.gpkg".format(
                params["step_nr"], layer_def["src_filepath"].stem)
            gpkg_filepath = params["output_dir"].joinpath(gpkg_filename)
            export(regions_lessMMU, ds, gpkg_filepath,
                   MAX_REPORTED_REGION_COUNT)
            status.add_attachment(gpkg_filename)
            if len(regions_lessMMU) <= MAX_REPORTED_REGION_COUNT:
                status.failed(
                    "The data source has {:d} error objects under MMU limit of {:d} pixels."
                    .format(len(regions_lessMMU), params["area_pixels"]))
            else:
                status.failed(
                    "The data source has more than {:d} error objects under MMU limit of {:d} pixels."
                    .format(MAX_REPORTED_REGION_COUNT, params["area_pixels"]))