def vector_get_attribute_table( vector: Union[str, ogr.DataSource], process_layer: int = 0, include_geom: bool = False, ) -> pd.DataFrame: type_check(vector, [str, ogr.DataSource], "vector") type_check(process_layer, [int], "process_layer") type_check(include_geom, [bool], "include_geom") ref = open_vector(vector) metadata = internal_vector_to_metadata( ref, process_layer=process_layer, create_geometry=False ) attribute_table_header = None feature_count = None attribute_table_header = metadata["layers"][process_layer]["field_names"] feature_count = metadata["layers"][process_layer]["feature_count"] attribute_table = [] layer = ref.GetLayer(process_layer) for _ in range(feature_count): feature = layer.GetNextFeature() attributes = [feature.GetFID()] for field_name in attribute_table_header: attributes.append(feature.GetField(field_name)) if include_geom: geom_defn = feature.GetGeometryRef() attributes.append(geom_defn.ExportToIsoWkt()) attribute_table.append(attributes) attribute_table_header.insert(0, "fid") if include_geom: attribute_table_header.append("geom") df = pd.DataFrame(attribute_table, columns=attribute_table_header) return df
def internal_singlepart_to_multipart( vector: Union[str, ogr.DataSource], out_path: Optional[str] = None, overwrite: bool = True, add_index: bool = True, process_layer: int = -1, ) -> str: type_check(vector, [str, ogr.DataSource], "vector") type_check(out_path, [str], "out_path", allow_none=True) type_check(overwrite, [bool], "overwrite") type_check(add_index, [bool], "add_index") type_check(process_layer, [int], "process_layer") vector_list, path_list = ready_io_vector(vector, out_path, overwrite=overwrite) ref = open_vector(vector_list[0]) out_name = path_list[0] out_format = path_to_driver_vector(out_name) driver = ogr.GetDriverByName(out_format) overwrite_required(out_name, overwrite) metadata = internal_vector_to_metadata(ref) remove_if_overwrite(out_name, overwrite) destination: ogr.DataSource = driver.CreateDataSource(out_name) for index, layer_meta in enumerate(metadata["layers"]): if process_layer != -1 and index != process_layer: continue name = layer_meta["layer_name"] geom = layer_meta["column_geom"] sql = f"SELECT ST_Collect({geom}) AS geom FROM {name};" result = ref.ExecuteSQL(sql, dialect="SQLITE") destination.CopyLayer(result, name, ["OVERWRITE=YES"]) if add_index: vector_add_index(destination) destination.FlushCache() return out_name
def merge_vectors( vectors: List[Union[str, ogr.DataSource]], out_path: Optional[str] = None, preserve_fid: bool = True, ) -> str: """Merge vectors to a single geopackage.""" type_check(vectors, [list], "vector") type_check(out_path, [str], "out_path", allow_none=True) type_check(preserve_fid, [bool], "preserve_fid") vector_list = to_vector_list(vectors) out_driver = "GPKG" out_format = ".gpkg" out_target = f"/vsimem/clipped_{uuid4().int}{out_format}" if out_path is not None: out_target = out_path out_driver = path_to_driver_vector(out_path) out_format = path_to_ext(out_path) driver = ogr.GetDriverByName(out_driver) merged_ds: ogr.DataSource = driver.CreateDataSource(out_target) for vector in vector_list: ref = open_vector(vector) metadata = internal_vector_to_metadata(ref) for layer in metadata["layers"]: name = layer["layer_name"] merged_ds.CopyLayer(ref.GetLayer(name), name, ["OVERWRITE=YES"]) merged_ds.FlushCache() return out_target
def vector_get_fids( vector: Union[str, ogr.DataSource], process_layer: int = 0 ) -> np.ndarray: type_check(vector, [str, ogr.DataSource], "vector") type_check(process_layer, [int], "process_layer") metadata = internal_vector_to_metadata(vector) features = metadata["layers"][0]["feature_count"] ref = open_vector(vector) layer = ref.GetLayer(process_layer) if layer is None: raise Exception(f"Requested a non-existing layer: layer_idx={process_layer}") fid_list = np.empty(features, dtype=int) for index in range(features): feature = layer.GetNextFeature() fid_list[index] = feature.GetFID() layer.ResetReading() return fid_list
def extract_patches( raster_list, outdir, tile_size=32, zones=None, options=None, ): """ Generate patches for machine learning from rasters """ base_options = { "overlaps": True, "border_check": True, "merge_output": True, "force_align": True, "output_raster_labels": True, "label_geom": None, "label_res": 0.2, "label_mult": 100, "tolerance": 0.0, "fill_value": 0, "zone_layer_id": 0, "align_with_size": 20, "prefix": "", "postfix": "", } if options is None: options = base_options else: for key in options: if key not in base_options: raise ValueError(f"Invalid option: {key}") base_options[key] = options[key] options = base_options if zones is not None and not is_vector(zones): raise TypeError( "Clip geom is invalid. Did you input a valid geometry?") if not isinstance(raster_list, list): raster_list = [raster_list] for raster in raster_list: if not is_raster(raster): raise TypeError("raster_list is not a list of rasters.") if not os.path.isdir(outdir): raise ValueError( "Outdir does not exist. Please create before running the function." ) if not rasters_are_aligned(raster_list, same_extent=True): if options["force_align"]: print( "Rasters we not aligned. Realigning rasters due to force_align=True option." ) raster_list = align_rasters(raster_list) else: raise ValueError("Rasters in raster_list are not aligned.") offsets = get_offsets(tile_size) if options["overlaps"] else [[0, 0]] raster_metadata = raster_to_metadata(raster_list[0], create_geometry=True) pixel_size = min(raster_metadata["pixel_height"], raster_metadata["pixel_width"]) if zones is None: zones = raster_metadata["extent_datasource_path"] zones_meta = vector_to_metadata(zones) mem_driver = ogr.GetDriverByName("ESRI Shapefile") if zones_meta["layer_count"] == 0: raise ValueError("Vector contains no layers.") zones_layer_meta = zones_meta["layers"][options["zone_layer_id"]] if zones_layer_meta["geom_type"] not in ["Multi Polygon", "Polygon"]: raise ValueError("clip geom is not Polygon or Multi Polygon.") zones_ogr = open_vector(zones) zones_layer = zones_ogr.GetLayer(options["zone_layer_id"]) feature_defn = zones_layer.GetLayerDefn() fids = vector_get_fids(zones_ogr, options["zone_layer_id"]) progress(0, len(fids) * len(raster_list), "processing fids") processed_fids = [] processed = 0 labels_processed = False for idx, raster in enumerate(raster_list): name = os.path.splitext(os.path.basename(raster))[0] list_extracted = [] list_masks = [] list_labels = [] for fid in fids: feature = zones_layer.GetFeature(fid) geom = feature.GetGeometryRef() fid_path = f"/vsimem/fid_mem_{uuid4().int}_{str(fid)}.shp" fid_ds = mem_driver.CreateDataSource(fid_path) fid_ds_lyr = fid_ds.CreateLayer( "fid_layer", geom_type=ogr.wkbPolygon, srs=zones_layer_meta["projection_osr"], ) copied_feature = ogr.Feature(feature_defn) copied_feature.SetGeometry(geom) fid_ds_lyr.CreateFeature(copied_feature) fid_ds.FlushCache() fid_ds.SyncToDisk() valid_path = f"/vsimem/{options['prefix']}validmask_{str(fid)}{options['postfix']}.tif" rasterize_vector( fid_path, pixel_size, out_path=valid_path, extent=fid_path, ) valid_arr = raster_to_array(valid_path) if options["label_geom"] is not None and fid not in processed_fids: if not is_vector(options["label_geom"]): raise TypeError( "label geom is invalid. Did you input a valid geometry?" ) uuid = str(uuid4().int) label_clip_path = f"/vsimem/fid_{uuid}_{str(fid)}_clipped.shp" label_ras_path = f"/vsimem/fid_{uuid}_{str(fid)}_rasterized.tif" label_warp_path = f"/vsimem/fid_{uuid}_{str(fid)}_resampled.tif" intersect_vector(options["label_geom"], fid_ds, out_path=label_clip_path) try: rasterize_vector( label_clip_path, options["label_res"], out_path=label_ras_path, extent=valid_path, ) except Exception: array_to_raster( np.zeros(valid_arr.shape, dtype="float32"), valid_path, out_path=label_ras_path, ) resample_raster( label_ras_path, pixel_size, resample_alg="average", out_path=label_warp_path, ) labels_arr = (raster_to_array(label_warp_path) * options["label_mult"]).astype("float32") if options["output_raster_labels"]: array_to_raster( labels_arr, label_warp_path, out_path= f"{outdir}{options['prefix']}label_{str(fid)}{options['postfix']}.tif", ) raster_clip_path = f"/vsimem/raster_{uuid}_{str(idx)}_clipped.tif" try: clip_raster( raster, valid_path, raster_clip_path, all_touch=False, adjust_bbox=False, ) except Exception as e: print( f"Warning: {raster} did not intersect geom with fid: {fid}." ) print(e) if options["label_geom"] is not None: gdal.Unlink(label_clip_path) gdal.Unlink(label_ras_path) gdal.Unlink(label_warp_path) gdal.Unlink(fid_path) continue arr = raster_to_array(raster_clip_path) if arr.shape[:2] != valid_arr.shape[:2]: raise Exception( f"Error while matching array shapes. Raster: {arr.shape}, Valid: {valid_arr.shape}" ) arr_offsets = get_overlaps(arr, offsets, tile_size, options["border_check"]) arr = np.concatenate(arr_offsets) valid_offsets = np.concatenate( get_overlaps(valid_arr, offsets, tile_size, options["border_check"])) valid_mask = ((1 - (valid_offsets.sum(axis=(1, 2)) / (tile_size * tile_size))) <= options["tolerance"])[:, 0] arr = arr[valid_mask] valid_masked = valid_offsets[valid_mask] if options["label_geom"] is not None and not labels_processed: labels_masked = np.concatenate( get_overlaps(labels_arr, offsets, tile_size, options["border_check"]))[valid_mask] if options["merge_output"]: list_extracted.append(arr) list_masks.append(valid_masked) if options["label_geom"] is not None and not labels_processed: list_labels.append(labels_masked) else: np.save( f"{outdir}{options['prefix']}{str(fid)}_{name}{options['postfix']}.npy", arr.filled(options["fill_value"]), ) np.save( f"{outdir}{options['prefix']}{str(fid)}_mask_{name}{options['postfix']}.npy", valid_masked.filled(options["fill_value"]), ) if options["label_geom"] is not None and not labels_processed: np.save( f"{outdir}{options['prefix']}{str(fid)}_label_{name}{options['postfix']}.npy", valid_masked.filled(options["fill_value"]), ) if fid not in processed_fids: processed_fids.append(fid) processed += 1 progress(processed, len(fids) * len(raster_list), "processing fids") if not options["merge_output"]: gdal.Unlink(label_clip_path) gdal.Unlink(label_ras_path) gdal.Unlink(label_warp_path) gdal.Unlink(fid_path) gdal.Unlink(valid_path) if options["merge_output"]: np.save( f"{outdir}{options['prefix']}{name}{options['postfix']}.npy", np.ma.concatenate(list_extracted).filled( options["fill_value"]), ) np.save( f"{outdir}{options['prefix']}mask_{name}{options['postfix']}.npy", np.ma.concatenate(list_masks).filled(options["fill_value"]), ) if options["label_geom"] is not None and not labels_processed: np.save( f"{outdir}{options['prefix']}label_{name}{options['postfix']}.npy", np.ma.concatenate(list_labels).filled( options["fill_value"]), ) labels_processed = True progress(1, 1, "processing fids") return 1
def internal_reproject_vector( vector: Union[str, ogr.DataSource], projection: Union[str, int, ogr.DataSource, gdal.Dataset, osr.SpatialReference], out_path: Optional[str] = None, copy_if_same: bool = False, overwrite: bool = True, ) -> str: type_check(vector, [str, ogr.DataSource], "vector") type_check( projection, [str, int, ogr.DataSource, gdal.Dataset, osr.SpatialReference], "projection", ) type_check(out_path, [str], "out_path", allow_none=True) type_check(copy_if_same, [bool], "copy_if_same") type_check(overwrite, [bool], "overwrite") vector_list, path_list = ready_io_vector(vector, out_path, overwrite=overwrite) origin = open_vector(vector_list[0]) metadata = internal_vector_to_metadata(origin) out_name = path_list[0] origin_projection = metadata["projection_osr"] target_projection = parse_projection(projection) if not isinstance(target_projection, osr.SpatialReference): raise Exception("Error ") if origin_projection.IsSame(target_projection): if copy_if_same: if out_path is None: return internal_vector_to_memory(origin) return internal_vector_to_disk(origin, out_name) else: return get_vector_path(vector) # GDAL 3 changes axis order: https://github.com/OSGeo/gdal/issues/1546 if int(osgeo.__version__[0]) >= 3: origin_projection.SetAxisMappingStrategy( osr.OAMS_TRADITIONAL_GIS_ORDER) target_projection.SetAxisMappingStrategy( osr.OAMS_TRADITIONAL_GIS_ORDER) coord_trans = osr.CoordinateTransformation(origin_projection, target_projection) remove_if_overwrite(out_path, overwrite) driver = ogr.GetDriverByName(path_to_driver_vector(out_name)) destination: ogr.DataSource = driver.CreateDataSource(out_name) for layer_idx in range(len(metadata["layers"])): origin_layer = origin.GetLayerByIndex(layer_idx) origin_layer_defn = origin_layer.GetLayerDefn() layer_dict = metadata["layers"][layer_idx] layer_name = layer_dict["layer_name"] layer_geom_type = layer_dict["geom_type_ogr"] destination_layer = destination.CreateLayer(layer_name, target_projection, layer_geom_type) destination_layer_defn = destination_layer.GetLayerDefn() # Copy field definitions origin_layer_defn = origin_layer.GetLayerDefn() for i in range(0, origin_layer_defn.GetFieldCount()): field_defn = origin_layer_defn.GetFieldDefn(i) destination_layer.CreateField(field_defn) # Loop through the input features for _ in range(origin_layer.GetFeatureCount()): feature = origin_layer.GetNextFeature() geom = feature.GetGeometryRef() geom.Transform(coord_trans) new_feature = ogr.Feature(destination_layer_defn) new_feature.SetGeometry(geom) # Copy field values for i in range(0, destination_layer_defn.GetFieldCount()): new_feature.SetField( destination_layer_defn.GetFieldDefn(i).GetNameRef(), feature.GetField(i), ) destination_layer.CreateFeature(new_feature) destination_layer.ResetReading() destination_layer = None destination.FlushCache() return out_name
def internal_clip_vector( vector: Union[str, ogr.DataSource], clip_geom: Union[str, ogr.DataSource, gdal.Dataset], out_path: Optional[str] = None, process_layer: int = 0, process_layer_clip: int = 0, to_extent: bool = False, target_projection: Optional[Union[str, ogr.DataSource, gdal.Dataset, osr.SpatialReference, int]] = None, preserve_fid: bool = True, ) -> str: """Clips a vector to a geometry. Returns: A clipped ogr.Datasource or the path to one. """ type_check(vector, [str, ogr.DataSource], "vector") type_check(clip_geom, [ogr.DataSource, gdal.Dataset, str, list, tuple], "clip_geom") type_check(out_path, [str], "out_path", allow_none=True) type_check(process_layer, [int], "process_layer") type_check(process_layer_clip, [int], "process_layer_clip") type_check(to_extent, [bool], "to_extent") type_check( target_projection, [str, ogr.DataSource, gdal.Dataset, osr.SpatialReference, int], "target_projection", allow_none=True, ) type_check(preserve_fid, [bool], "preserve_fid") out_format = ".gpkg" out_target = f"/vsimem/clipped_{uuid4().int}{out_format}" if out_path is not None: out_target = out_path out_format = path_to_driver_vector(out_path) options = [] geometry_to_clip = None if is_vector(clip_geom): if to_extent: extent = internal_vector_to_metadata( clip_geom, create_geometry=True)["extent_datasource"] geometry_to_clip = internal_vector_to_memory(extent) else: geometry_to_clip = open_vector(clip_geom, layer=process_layer_clip) elif is_raster(clip_geom): extent = internal_raster_to_metadata( clip_geom, create_geometry=True)["extent_datasource"] geometry_to_clip = internal_vector_to_memory(extent) else: raise ValueError( f"Invalid input in clip_geom, unable to parse: {clip_geom}") clip_vector_path = internal_vector_to_metadata(geometry_to_clip)["path"] options.append(f"-clipsrc {clip_vector_path}") if preserve_fid: options.append("-preserve_fid") else: options.append("-unsetFid") out_projection = None if target_projection is not None: out_projection = parse_projection(target_projection, return_wkt=True) options.append(f"-t_srs {out_projection}") origin = open_vector(vector, layer=process_layer) # dst # src success = gdal.VectorTranslate( out_target, get_vector_path(origin), format=out_format, options=" ".join(options), ) if success != 0: return out_target else: raise Exception("Error while clipping geometry.")
def extract_patches( raster: Union[List[Union[str, gdal.Dataset]], str, gdal.Dataset], out_dir: Optional[str] = None, prefix: str = "", postfix: str = "_patches", size: int = 32, offsets: Union[list, None] = [], generate_border_patches: bool = True, generate_zero_offset: bool = True, generate_grid_geom: bool = True, clip_geom: Optional[Union[str, ogr.DataSource, gdal.Dataset]] = None, clip_layer_index: int = 0, verify_output=True, verification_samples=100, overwrite=True, epsilon: float = 1e-9, verbose: int = 1, ) -> tuple: """Extracts square tiles from a raster. Args: raster (list of rasters | path | raster): The raster(s) to convert. **kwargs: out_dir (path | none): Folder to save output. If None, in-memory arrays and geometries are outputted. prefix (str): A prefix for all outputs. postfix (str): A postfix for all outputs. size (int): The size of the tiles in pixels. offsets (list of tuples): List of offsets to extract. Example: offsets=[(16, 16), (16, 0), (0, 16)]. Will offset the initial raster and extract from there. generate_border_patches (bool): The tiles often do not align with the rasters which means borders are trimmed somewhat. If generate_border_patches is True, an additional tile is added where needed. generate_zero_offset (bool): if True, an offset is inserted at (0, 0) if none is present. generate_grid_geom (bool): Output a geopackage with the grid of tiles. clip_geom (str, raster, vector): Clip the output to the intersections with a geometry. Useful if a lot of the target area is water or similar. epsilon (float): How much for buffer the arange array function. This should usually just be left alone. verbose (int): If 1 will output messages on progress. Returns: A tuple with paths to the generated items. (numpy_array, grid_geom) """ type_check(raster, [str, list, gdal.Dataset], "raster") type_check(out_dir, [str], "out_dir", allow_none=True) type_check(prefix, [str], "prefix") type_check(postfix, [str], "postfix") type_check(size, [int], "size") type_check(offsets, [list], "offsets", allow_none=True) type_check(generate_grid_geom, [bool], "generate_grid_geom") type_check( clip_geom, [str, ogr.DataSource, gdal.Dataset], "clip_layer_index", allow_none=True, ) type_check(clip_layer_index, [int], "clip_layer_index") type_check(overwrite, [bool], "overwrite") type_check(epsilon, [float], "epsilon") type_check(verbose, [int], "verbose") in_rasters = to_raster_list(raster) if out_dir is not None and not os.path.isdir(out_dir): raise ValueError(f"Output directory does not exists: {out_dir}") if not rasters_are_aligned(in_rasters): raise ValueError( "Input rasters must be aligned. Please use the align function.") output_geom = None metadata = internal_raster_to_metadata(in_rasters[0]) if verbose == 1: print("Generating blocks..") # internal offset array. Avoid manipulating the og array. if offsets is None: offsets = [] in_offsets = [] if generate_zero_offset and (0, 0) not in offsets: in_offsets.append((0, 0)) for offset in offsets: if offset != (0, 0): if not isinstance(offset, (list, tuple)) or len(offset) != 2: raise ValueError( f"offset must be a list or tuple of two integers. Recieved: {offset}" ) in_offsets.append((offset[0], offset[1])) border_patches_needed_x = True border_patches_needed_y = True if clip_geom is not None: border_patches_needed_x = False border_patches_needed_y = False shapes = [] for offset in in_offsets: block_shape = shape_to_blockshape(metadata["shape"], (size, size), offset) if block_shape[0] * size == metadata["width"]: border_patches_needed_x = False if block_shape[1] * size == metadata["height"]: border_patches_needed_y = False shapes.append(block_shape) if generate_border_patches: cut_x = (metadata["width"] - in_offsets[0][0]) - (shapes[0][0] * size) cut_y = (metadata["height"] - in_offsets[0][1]) - (shapes[0][1] * size) if border_patches_needed_x and cut_x > 0: shapes[0][0] += 1 if border_patches_needed_y and cut_y > 0: shapes[0][1] += 1 # calculate the offsets all_rows = 0 offset_rows = [] for i in range(len(shapes)): row = 0 for j in range(len(shapes[i])): if j == 0: row = int(shapes[i][j]) else: row *= int(shapes[i][j]) offset_rows.append(row) all_rows += row offset_rows_cumsum = np.cumsum(offset_rows) if generate_grid_geom is True or clip_geom is not None: if verbose == 1: print("Calculating grid cells..") mask = np.arange(all_rows, dtype="uint64") ulx, uly, _lrx, _lry = metadata["extent"] pixel_width = abs(metadata["pixel_width"]) pixel_height = abs(metadata["pixel_height"]) xres = pixel_width * size yres = pixel_height * size dx = xres / 2 dy = yres / 2 # Ready clip geom outside of loop. if clip_geom is not None: clip_ref = open_vector( internal_reproject_vector(clip_geom, metadata["projection_osr"])) clip_layer = clip_ref.GetLayerByIndex(clip_layer_index) meta_clip = internal_vector_to_metadata(clip_ref) # geom_clip = meta_clip["layers"][clip_layer_index]["column_geom"] clip_extent = meta_clip["extent_ogr"] # clip_adjust = [ # clip_extent[0] - clip_extent[0] % xres, # x_min # (clip_extent[1] - clip_extent[1] % xres) + xres, # x_max # clip_extent[2] - clip_extent[2] % yres, # y_min # (clip_extent[3] - clip_extent[3] % yres) + yres, # y_max # ] coord_grid = np.empty((all_rows, 2), dtype="float64") # tiled_extent = [None, None, None, None] row_count = 0 for idx in range(len(in_offsets)): x_offset = in_offsets[idx][0] y_offset = in_offsets[idx][1] x_step = shapes[idx][0] y_step = shapes[idx][1] x_min = (ulx + dx) + (x_offset * pixel_width) x_max = x_min + (x_step * xres) y_max = (uly - dy) - (y_offset * pixel_height) y_min = y_max - (y_step * yres) # if clip_geom is not None: # if clip_adjust[0] > x_min: # x_min = clip_adjust[0] + (x_offset * pixel_width) # if clip_adjust[1] < x_max: # x_max = clip_adjust[1] + (x_offset * pixel_width) # if clip_adjust[2] > y_min: # y_min = clip_adjust[2] - (y_offset * pixel_height) # if clip_adjust[3] < y_max: # y_max = clip_adjust[3] - (y_offset * pixel_height) # if idx == 0: # tiled_extent[0] = x_min # tiled_extent[1] = x_max # tiled_extent[2] = y_min # tiled_extent[3] = y_max # else: # if x_min < tiled_extent[0]: # tiled_extent[0] = x_min # if x_max > tiled_extent[1]: # tiled_extent[1] = x_max # if y_min < tiled_extent[2]: # tiled_extent[2] = y_min # if y_max > tiled_extent[3]: # tiled_extent[3] = y_max # y is flipped so: xmin --> xmax, ymax -- ymin to keep same order as numpy array x_patches = round((x_max - x_min) / xres) y_patches = round((y_max - y_min) / yres) xr = np.arange(x_min, x_max, xres)[0:x_step] if xr.shape[0] < x_patches: xr = np.arange(x_min, x_max + epsilon, xres)[0:x_step] elif xr.shape[0] > x_patches: xr = np.arange(x_min, x_max - epsilon, xres)[0:x_step] yr = np.arange(y_max, y_min + epsilon, -yres)[0:y_step] if yr.shape[0] < y_patches: yr = np.arange(y_max, y_min - epsilon, -yres)[0:y_step] elif yr.shape[0] > y_patches: yr = np.arange(y_max, y_min + epsilon, -yres)[0:y_step] if generate_border_patches and idx == 0: if border_patches_needed_x: xr[-1] = xr[-1] - ( (xr[-1] + dx) - metadata["extent_dict"]["right"]) if border_patches_needed_y: yr[-1] = yr[-1] - ( (yr[-1] - dy) - metadata["extent_dict"]["bottom"]) oxx, oyy = np.meshgrid(xr, yr) oxr = oxx.ravel() oyr = oyy.ravel() offset_length = oxr.shape[0] coord_grid[row_count:row_count + offset_length, 0] = oxr coord_grid[row_count:row_count + offset_length, 1] = oyr row_count += offset_length offset_rows_cumsum[idx] = offset_length offset_rows_cumsum = np.cumsum(offset_rows_cumsum) coord_grid = coord_grid[:row_count] # Output geometry driver = ogr.GetDriverByName("GPKG") patches_path = f"/vsimem/patches_{uuid4().int}.gpkg" patches_ds = driver.CreateDataSource(patches_path) patches_layer = patches_ds.CreateLayer("patches_all", geom_type=ogr.wkbPolygon, srs=metadata["projection_osr"]) patches_fdefn = patches_layer.GetLayerDefn() og_fid = "og_fid" field_defn = ogr.FieldDefn(og_fid, ogr.OFTInteger) patches_layer.CreateField(field_defn) if clip_geom is not None: clip_feature_count = meta_clip["layers"][clip_layer_index][ "feature_count"] spatial_index = rtree.index.Index(interleaved=False) for _ in range(clip_feature_count): clip_feature = clip_layer.GetNextFeature() clip_fid = clip_feature.GetFID() clip_feature_geom = clip_feature.GetGeometryRef() xmin, xmax, ymin, ymax = clip_feature_geom.GetEnvelope() spatial_index.insert(clip_fid, (xmin, xmax, ymin, ymax)) fids = 0 mask = [] for tile_id in range(coord_grid.shape[0]): x, y = coord_grid[tile_id] if verbose == 1: progress(tile_id, coord_grid.shape[0], "Patch generation") x_min = x - dx x_max = x + dx y_min = y - dx y_max = y + dx tile_intersects = True grid_geom = None poly_wkt = None if clip_geom is not None: tile_intersects = False if not ogr_bbox_intersects([x_min, x_max, y_min, y_max], clip_extent): continue intersections = list( spatial_index.intersection((x_min, x_max, y_min, y_max))) if len(intersections) == 0: continue poly_wkt = f"POLYGON (({x_min} {y_max}, {x_max} {y_max}, {x_max} {y_min}, {x_min} {y_min}, {x_min} {y_max}))" grid_geom = ogr.CreateGeometryFromWkt(poly_wkt) for fid1 in intersections: clip_feature = clip_layer.GetFeature(fid1) clip_geom = clip_feature.GetGeometryRef() if grid_geom.Intersects(clip_geom): tile_intersects = True continue if tile_intersects: ft = ogr.Feature(patches_fdefn) if grid_geom is None: poly_wkt = f"POLYGON (({x_min} {y_max}, {x_max} {y_max}, {x_max} {y_min}, {x_min} {y_min}, {x_min} {y_max}))" grid_geom = ogr.CreateGeometryFromWkt(poly_wkt) ft_geom = ogr.CreateGeometryFromWkt(poly_wkt) ft.SetGeometry(ft_geom) ft.SetField(og_fid, int(fids)) ft.SetFID(int(fids)) patches_layer.CreateFeature(ft) ft = None mask.append(tile_id) fids += 1 if verbose == 1: progress(coord_grid.shape[0], coord_grid.shape[0], "Patch generation") mask = np.array(mask, dtype=int) if generate_grid_geom is True: if out_dir is None: output_geom = patches_ds else: raster_basename = metadata["basename"] geom_name = f"{prefix}{raster_basename}_geom_{str(size)}{postfix}.gpkg" output_geom = os.path.join(out_dir, geom_name) overwrite_required(output_geom, overwrite) remove_if_overwrite(output_geom, overwrite) if verbose == 1: print("Writing output geometry..") internal_vector_to_disk(patches_ds, output_geom, overwrite=overwrite) if verbose == 1: print("Writing numpy array..") output_blocks = [] for raster in in_rasters: base = None basename = None output_block = None if out_dir is not None: base = os.path.basename(raster) basename = os.path.splitext(base)[0] output_block = os.path.join(out_dir + f"{prefix}{basename}{postfix}.npy") metadata = internal_raster_to_metadata(raster) if generate_grid_geom is True or clip_geom is not None: output_shape = (row_count, size, size, metadata["band_count"]) else: output_shape = (all_rows, size, size, metadata["band_count"]) input_datatype = metadata["datatype"] output_array = np.empty(output_shape, dtype=input_datatype) # if clip_geom is not None: # ref = raster_to_array(raster, filled=True, extent=tiled_extent) # else: ref = raster_to_array(raster, filled=True) for k, offset in enumerate(in_offsets): start = 0 if k > 0: start = offset_rows_cumsum[k - 1] blocks = None if (k == 0 and generate_border_patches and (border_patches_needed_x or border_patches_needed_y)): blocks = array_to_blocks( ref, (size, size), offset, border_patches_needed_x, border_patches_needed_y, ) else: blocks = array_to_blocks(ref, (size, size), offset) output_array[start:offset_rows_cumsum[k]] = blocks if generate_grid_geom is False and clip_geom is None: if out_dir is None: output_blocks.append(output_array) else: output_blocks.append(output_block) np.save(output_block, output_array) else: if out_dir is None: output_blocks.append(output_array[mask]) else: output_blocks.append(output_block) np.save(output_block, output_array[mask]) if verify_output and generate_grid_geom: test_extraction( in_rasters, output_blocks, output_geom, samples=verification_samples, grid_layer_index=0, verbose=verbose, ) if len(output_blocks) == 1: output_blocks = output_blocks[0] return (output_blocks, output_geom)
def test_extraction( rasters: Union[list, str, gdal.Dataset], arrays: Union[list, np.ndarray], grid: Union[ogr.DataSource, str], samples: int = 1000, # if 0, all grid_layer_index: int = 0, verbose: int = 1, ) -> bool: """Validates the output of the patch_extractor. Useful if you need peace of mind. Set samples to 0 to tests everything. Args: rasters (list of rasters | path | raster): The raster(s) used. arrays (list of arrays | ndarray): The arrays generated. grid (vector | vector_path): The grid generated. **kwargs: samples (int): The amount of patches to randomly test. If 0 all patches will be tested. This is a long process, so consider only testing everything if absolutely necessary. grid_layer_index (int): If the grid is part of a multi-layer vector, specify the index of the grid. verbose (int): If 1 will output messages on progress. Returns: True if the extraction is valid. Raises an error otherwise. """ type_check(rasters, [list, str, gdal.Dataset], "rasters") type_check(arrays, [list, str, np.ndarray], "arrays") type_check(grid, [list, str, ogr.DataSource], "grid") type_check(samples, [int], "samples") type_check(grid_layer_index, [int], "clip_layer_index") type_check(verbose, [int], "verbose") in_rasters = to_raster_list(rasters) in_arrays = arrays if verbose == 1: print("Verifying integrity of output grid..") # grid_memory = open_vector(internal_vector_to_memory(grid)) grid_memory = open_vector(grid) grid_metadata = internal_vector_to_metadata(grid) grid_projection = grid_metadata["projection_osr"] if grid_layer_index > (grid_metadata["layer_count"] - 1): raise ValueError( f"Requested non-existing layer index: {grid_layer_index}") grid_layer = grid_memory.GetLayer(grid_layer_index) # Select sample fids feature_count = grid_metadata["layers"][grid_layer_index]["feature_count"] test_samples = samples if samples > 0 else feature_count max_test = min(test_samples, feature_count) - 1 test_fids = np.array(random.sample(range(0, feature_count), max_test), dtype="uint64") mem_driver = ogr.GetDriverByName("ESRI Shapefile") for index, raster in enumerate(in_rasters): test_rast = open_raster(raster) test_array = in_arrays[index] if isinstance(test_array, str): if not os.path.exists(test_array): raise ValueError(f"Numpy array does not exist: {test_array}") try: test_array = np.load(in_arrays[index]) except: raise Exception( f"Attempted to read numpy raster from: {in_arrays[index]}") base = os.path.basename(raster) basename = os.path.splitext(base)[0] if verbose == 1: print(f"Testing: {basename}") tested = 0 for test in test_fids: feature = grid_layer.GetFeature(test) if feature is None: raise Exception(f"Feature not found: {test}") test_ds_path = f"/vsimem/test_mem_grid_{uuid4().int}.gpkg" test_ds = mem_driver.CreateDataSource(test_ds_path) test_ds_lyr = test_ds.CreateLayer("test_mem_grid_layer", geom_type=ogr.wkbPolygon, srs=grid_projection) test_ds_lyr.CreateFeature(feature.Clone()) test_ds.SyncToDisk() clipped = internal_clip_raster( test_rast, test_ds_path, adjust_bbox=False, crop_to_geom=True, all_touch=False, ) if clipped is None: raise Exception( "Error while clipping raster. Likely a bad extraction.") ref_image = raster_to_array(clipped, filled=True) image_block = test_array[test] if not np.array_equal(ref_image, image_block): # from matplotlib import pyplot as plt; plt.imshow(ref_image[:,:,0]); plt.show() raise Exception( f"Image {basename} and grid cell did not match..") if verbose == 1: progress(tested, len(test_fids) - 1, "Verifying..") tested += 1 return True
def rasterize_vector( vector, pixel_size, out_path=None, extent=None, all_touch=False, dtype="uint8", optim="raster", band=1, fill_value=0, nodata_value=None, check_memory=True, burn_value=1, attribute=None, ): vector_fn = vector if out_path is None: raster_fn = f"/vsimem/{str(uuid4())}.tif" else: raster_fn = out_path # Open the data source and read in the extent source_ds = open_vector(vector_fn) source_meta = internal_vector_to_metadata(vector_fn) source_layer = source_ds.GetLayer() x_min, x_max, y_min, y_max = source_layer.GetExtent() # Create the destination data source x_res = int((x_max - x_min) / pixel_size) y_res = int((y_max - y_min) / pixel_size) if extent is not None: extent_vector = internal_vector_to_metadata(extent) extent_dict = extent_vector["extent_dict"] x_res = int((extent_dict["right"] - extent_dict["left"]) / pixel_size) y_res = int((extent_dict["top"] - extent_dict["bottom"]) / pixel_size) x_min = extent_dict["left"] y_max = extent_dict["top"] if check_memory is False: gdal.SetConfigOption("CHECK_DISK_FREE_SPACE", "FALSE") try: target_ds = gdal.GetDriverByName("GTiff").Create( raster_fn, x_res, y_res, 1, numpy_to_gdal_datatype2(dtype), ) finally: gdal.SetConfigOption("CHECK_DISK_FREE_SPACE", "TRUE") if target_ds is None: raise Exception("Unable to rasterize.") target_ds.SetGeoTransform((x_min, pixel_size, 0, y_max, 0, -pixel_size)) target_ds.SetProjection(source_meta["projection"]) band = target_ds.GetRasterBand(1) if nodata_value is not None: band.SetNoDataValue(nodata_value) else: band.Fill(fill_value) options = [] if all_touch == True: options.append("ALL_TOUCHED=TRUE") else: options.append("ALL_TOUCHED=FALSE") if optim == "raster": options.append("OPTIM=RASTER") elif optim == "vector": options.append("OPTIM=VECTOR") else: options.append("OPTIM=AUTO") if attribute is None: gdal.RasterizeLayer( target_ds, [1], source_layer, burn_values=[burn_value], options=options, ) else: options.append(f"ATTRIBUTE={attribute}") gdal.RasterizeLayer(target_ds, [1], source_layer, options=default_options(options)) return raster_fn
def internal_dissolve_vector( vector: Union[str, ogr.DataSource], attribute: Optional[str] = None, out_path: str = None, overwrite: bool = True, add_index: bool = True, process_layer: int = -1, ) -> str: """Clips a vector to a geometry.""" type_check(vector, [str, ogr.DataSource], "vector") type_check(attribute, [str], "attribute", allow_none=True) type_check(out_path, [str], "out_path", allow_none=True) type_check(overwrite, [bool], "overwrite") type_check(add_index, [bool], "add_index") type_check(process_layer, [int], "process_layer") vector_list, path_list = ready_io_vector(vector, out_path) out_name = path_list[0] out_format = path_to_driver_vector(out_name) driver = ogr.GetDriverByName(out_format) ref = open_vector(vector_list[0]) metadata = internal_vector_to_metadata(ref) Layer_info = TypedDict( "Layer_info", { "name": str, "geom": str, "fields": List[str] }, ) layers: List[Layer_info] = [] if process_layer == -1: for index in range(len(metadata["layers"])): layers.append({ "name": metadata["layers"][index]["layer_name"], "geom": metadata["layers"][index]["column_geom"], "fields": metadata["layers"][index]["field_names"], }) else: layers.append({ "name": metadata["layers"][process_layer]["layer_name"], "geom": metadata["layers"][process_layer]["column_geom"], "fields": metadata["layers"][process_layer]["field_names"], }) destination: ogr.DataSource = driver.CreateDataSource(out_name) # Check if attribute table is valid for index in range(len(metadata["layers"])): layer = layers[index] if attribute is not None and attribute not in layer["fields"]: layer_fields = layer["fields"] raise ValueError( f"Invalid attribute for layer. Layers has the following fields: {layer_fields}" ) geom_col = layer["geom"] name = layer["name"] sql = None if attribute is None: sql = f"SELECT ST_Union({geom_col}) AS geom FROM {name};" else: sql = f"SELECT {attribute}, ST_Union({geom_col}) AS geom FROM {name} GROUP BY {attribute};" result = ref.ExecuteSQL(sql, dialect="SQLITE") destination.CopyLayer(result, name, ["OVERWRITE=YES"]) if add_index: vector_add_index(destination) destination.FlushCache() return out_name
def internal_multipart_to_singlepart( vector: Union[str, ogr.DataSource], out_path: Optional[str] = None, copy_attributes: bool = False, overwrite: bool = True, add_index: bool = True, process_layer: int = -1, verbose: int = 1, ) -> str: type_check(vector, [str, ogr.DataSource], "vector") type_check(out_path, [str], "out_path", allow_none=True) type_check(overwrite, [bool], "overwrite") type_check(add_index, [bool], "add_index") type_check(process_layer, [int], "process_layer") type_check(verbose, [int], "verbose") vector_list, path_list = ready_io_vector(vector, out_path, overwrite=overwrite) ref = open_vector(vector_list[0]) out_name = path_list[0] driver = ogr.GetDriverByName(path_to_driver_vector(out_name)) metadata = internal_vector_to_metadata(ref) remove_if_overwrite(out_name, overwrite) destination = driver.CreateDataSource(out_name) for index, layer_meta in enumerate(metadata["layers"]): if process_layer != -1 and index != process_layer: continue if verbose == 1: layer_name = layer_meta["layer_name"] print(f"Splitting layer: {layer_name}") target_unknown = False if layer_meta["geom_type_ogr"] == 4: # MultiPoint target_type = 1 # Point elif layer_meta["geom_type_ogr"] == 5: # MultiLineString target_type = 2 # LineString elif layer_meta["geom_type_ogr"] == 6: # MultiPolygon target_type = 3 # Polygon elif layer_meta["geom_type_ogr"] == 1004: # MultiPoint (z) target_type = 1001 # Point (z) elif layer_meta["geom_type_ogr"] == 1005: # MultiLineString (z) target_type = 1002 # LineString (z) elif layer_meta["geom_type_ogr"] == 1006: # MultiPolygon (z) target_type = 1003 # Polygon (z) elif layer_meta["geom_type_ogr"] == 2004: # MultiPoint (m) target_type = 2001 # Point (m) elif layer_meta["geom_type_ogr"] == 2005: # MultiLineString (m) target_type = 2002 # LineString (m) elif layer_meta["geom_type_ogr"] == 2006: # MultiPolygon (m) target_type = 2003 # Polygon (m) elif layer_meta["geom_type_ogr"] == 3004: # MultiPoint (zm) target_type = 3001 # Point (m) elif layer_meta["geom_type_ogr"] == 3005: # MultiLineString (zm) target_type = 3002 # LineString (m) elif layer_meta["geom_type_ogr"] == 3006: # MultiPolygon (zm) target_type = 3003 # Polygon (m) else: target_unknown = True target_type = layer_meta["geom_type_ogr"] destination_layer = destination.CreateLayer( layer_meta["layer_name"], layer_meta["projection_osr"], target_type ) layer_defn = destination_layer.GetLayerDefn() field_count = layer_meta["field_count"] original_target = ref.GetLayerByIndex(index) feature_count = original_target.GetFeatureCount() if copy_attributes: first_feature = original_target.GetNextFeature() original_target.ResetReading() if verbose == 1: print("Creating attribute fields") for field_id in range(field_count): field_defn = first_feature.GetFieldDefnRef(field_id) fname = field_defn.GetName() ftype = field_defn.GetTypeName() fwidth = field_defn.GetWidth() fprecision = field_defn.GetPrecision() if ftype == "String" or ftype == "Date": fielddefn = ogr.FieldDefn(fname, ogr.OFTString) fielddefn.SetWidth(fwidth) elif ftype == "Real": fielddefn = ogr.FieldDefn(fname, ogr.OFTReal) fielddefn.SetWidth(fwidth) fielddefn.SetPrecision(fprecision) else: fielddefn = ogr.FieldDefn(fname, ogr.OFTInteger) destination_layer.CreateField(fielddefn) for _ in range(feature_count): feature = original_target.GetNextFeature() geom = feature.GetGeometryRef() if target_unknown: out_feat = ogr.Feature(layer_defn) out_feat.SetGeometry(geom) if copy_attributes: for field_id in range(field_count): values = feature.GetField(field_id) out_feat.SetField(field_id, values) destination_layer.CreateFeature(out_feat) for geom_part in geom: out_feat = ogr.Feature(layer_defn) out_feat.SetGeometry(geom_part) if copy_attributes: for field_id in range(field_count): values = feature.GetField(field_id) out_feat.SetField(field_id, values) destination_layer.CreateFeature(out_feat) if verbose == 1: progress(_, feature_count - 1, "Splitting.") if add_index: vector_add_index(destination) return out_name
def _clip_raster( raster: Union[str, gdal.Dataset], clip_geom: Union[str, ogr.DataSource, gdal.Dataset], out_path: Optional[str] = None, resample_alg: str = "nearest", crop_to_geom: bool = True, adjust_bbox: bool = True, all_touch: bool = True, overwrite: bool = True, creation_options: list = [], dst_nodata: Union[str, int, float] = "infer", layer_to_clip: int = 0, prefix: str = "", postfix: str = "_clipped", verbose: int = 1, uuid: bool = False, ram: int = 8000, ) -> str: """OBS: Internal. Single output. Clips a raster(s) using a vector geometry or the extents of a raster. """ type_check(raster, [str, gdal.Dataset], "raster") type_check(clip_geom, [str, ogr.DataSource, gdal.Dataset], "clip_geom") type_check(out_path, [str], "out_path", allow_none=True) type_check(resample_alg, [str], "resample_alg") type_check(crop_to_geom, [bool], "crop_to_geom") type_check(adjust_bbox, [bool], "adjust_bbox") type_check(all_touch, [bool], "all_touch") type_check(dst_nodata, [str, int, float], "dst_nodata") type_check(layer_to_clip, [int], "layer_to_clip") type_check(overwrite, [bool], "overwrite") type_check(creation_options, [list], "creation_options") type_check(prefix, [str], "prefix") type_check(postfix, [str], "postfix") type_check(verbose, [int], "verbose") type_check(uuid, [bool], "uuid") _, path_list = ready_io_raster(raster, out_path, overwrite=overwrite, prefix=prefix, postfix=postfix, uuid=uuid) if out_path is not None: if "vsimem" not in out_path: if not os.path.isdir(os.path.split(os.path.normpath(out_path))[0]): raise ValueError( f"out_path folder does not exists: {out_path}") # Input is a vector. if is_vector(clip_geom): clip_ds = open_vector(clip_geom) clip_metadata = internal_vector_to_metadata( clip_ds, process_layer=layer_to_clip) if clip_metadata["layer_count"] > 1: clip_ds = internal_vector_to_memory(clip_ds, layer_to_extract=layer_to_clip) if isinstance(clip_ds, ogr.DataSource): clip_ds = clip_ds.GetName() # Input is a raster (use extent) elif is_raster(clip_geom): clip_metadata = raster_to_metadata(clip_geom, create_geometry=True) clip_metadata["layer_count"] = 1 clip_ds = clip_metadata["extent_datasource"].GetName() else: if file_exists(clip_geom): raise ValueError(f"Unable to parse clip geometry: {clip_geom}") else: raise ValueError(f"Unable to locate clip geometry {clip_geom}") if layer_to_clip > (clip_metadata["layer_count"] - 1): raise ValueError("Requested an unable layer_to_clip.") if clip_ds is None: raise ValueError(f"Unable to parse input clip geom: {clip_geom}") clip_projection = clip_metadata["projection_osr"] clip_extent = clip_metadata["extent"] # options warp_options = [] if all_touch: warp_options.append("CUTLINE_ALL_TOUCHED=TRUE") else: warp_options.append("CUTLINE_ALL_TOUCHED=FALSE") origin_layer = open_raster(raster) raster_metadata = raster_to_metadata(raster) origin_projection = raster_metadata["projection_osr"] origin_extent = raster_metadata["extent"] # Check if projections match, otherwise reproject target geom. if not origin_projection.IsSame(clip_projection): clip_metadata["extent"] = reproject_extent( clip_metadata["extent"], clip_projection, origin_projection, ) # Fast check: Does the extent of the two inputs overlap? if not gdal_bbox_intersects(origin_extent, clip_extent): raise Exception("Geometries did not intersect.") output_bounds = raster_metadata["extent_gdal_warp"] if crop_to_geom: if adjust_bbox: output_bounds = align_bbox( raster_metadata["extent"], clip_metadata["extent"], raster_metadata["pixel_width"], raster_metadata["pixel_height"], warp_format=True, ) else: output_bounds = clip_metadata["extent_gdal_warp"] # formats out_name = path_list[0] out_format = path_to_driver_raster(out_name) out_creation_options = default_options(creation_options) # nodata src_nodata = raster_metadata["nodata_value"] out_nodata = None if src_nodata is not None: out_nodata = src_nodata else: if dst_nodata == "infer": out_nodata = gdal_nodata_value_from_type( raster_metadata["datatype_gdal_raw"]) elif dst_nodata is None: out_nodata = None elif isinstance(dst_nodata, (int, float)): out_nodata = dst_nodata else: raise ValueError(f"Unable to parse nodata_value: {dst_nodata}") # Removes file if it exists and overwrite is True. remove_if_overwrite(out_path, overwrite) if verbose == 0: gdal.PushErrorHandler("CPLQuietErrorHandler") clipped = gdal.Warp( out_name, origin_layer, format=out_format, resampleAlg=translate_resample_method(resample_alg), targetAlignedPixels=False, outputBounds=output_bounds, xRes=raster_metadata["pixel_width"], yRes=raster_metadata["pixel_height"], cutlineDSName=clip_ds, cropToCutline= False, # GDAL does this incorrectly when targetAlignedPixels is True. creationOptions=out_creation_options, warpMemoryLimit=ram, warpOptions=warp_options, srcNodata=raster_metadata["nodata_value"], dstNodata=out_nodata, multithread=True, ) if verbose == 0: gdal.PopErrorHandler() if clipped is None: raise Exception("Error while clipping raster.") return out_name
def internal_intersect_vector( vector: Union[str, ogr.DataSource], clip_geom: Union[str, ogr.DataSource, gdal.Dataset], out_path: Optional[str] = None, to_extent: bool = False, process_layer: int = 0, process_layer_clip: int = 0, add_index: bool = True, preserve_fid: bool = True, overwrite: bool = True, return_bool: bool = False, ) -> str: """Clips a vector to a geometry. Returns: A clipped ogr.Datasource or the path to one. """ type_check(vector, [ogr.DataSource, str, list], "vector") type_check(clip_geom, [ogr.DataSource, gdal.Dataset, str, list, tuple], "clip_geom") type_check(out_path, [str], "out_path", allow_none=True) type_check(to_extent, [bool], "to_extent") type_check(process_layer, [int], "process_layer") type_check(process_layer_clip, [int], "process_layer_clip") type_check(add_index, [bool], "add_index") type_check(preserve_fid, [bool], "preserve_fid") type_check(overwrite, [bool], "overwrite") _vector_list, path_list = ready_io_vector(vector, out_path, overwrite=overwrite) out_name = path_list[0] match_projection = internal_reproject_vector(clip_geom, vector) geometry_to_clip = open_vector(match_projection) merged = open_vector(merge_vectors([vector, match_projection])) if add_index: vector_add_index(merged) vector_metadata = internal_vector_to_metadata(vector) vector_layername = vector_metadata["layers"][process_layer]["layer_name"] vector_geom_col = vector_metadata["layers"][process_layer]["column_geom"] clip_geom_metadata = internal_vector_to_metadata(geometry_to_clip) clip_geom_layername = clip_geom_metadata["layers"][process_layer_clip]["layer_name"] clip_geom_col = clip_geom_metadata["layers"][process_layer_clip]["column_geom"] if return_bool: sql = f"SELECT A.* FROM '{vector_layername}' A, '{clip_geom_layername}' B WHERE ST_INTERSECTS(A.{vector_geom_col}, B.{clip_geom_col});" else: sql = f"SELECT A.* FROM '{vector_layername}' A, '{clip_geom_layername}' B WHERE ST_INTERSECTS(A.{vector_geom_col}, B.{clip_geom_col});" result = merged.ExecuteSQL(sql, dialect="SQLITE") if return_bool: if result.GetFeatureCount() == 0: return False else: return True driver = ogr.GetDriverByName(path_to_driver_vector(out_name)) destination: ogr.DataSource = driver.CreateDataSource(out_name) destination.CopyLayer(result, vector_layername, ["OVERWRITE=YES"]) if destination is None: raise Exception("Error while running intersect.") destination.FlushCache() return out_name
def _warp_raster( raster: Union[str, gdal.Dataset], out_path: Optional[str] = None, projection: Optional[Union[int, str, gdal.Dataset, ogr.DataSource, osr.SpatialReference]] = None, clip_geom: Optional[Union[str, ogr.DataSource]] = None, target_size: Optional[Union[Tuple[Number], Number]] = None, target_in_pixels: bool = False, resample_alg: str = "nearest", crop_to_geom: bool = True, all_touch: bool = True, adjust_bbox: bool = True, overwrite: bool = True, creation_options: Union[list, None] = None, src_nodata: Union[str, int, float] = "infer", dst_nodata: Union[str, int, float] = "infer", layer_to_clip: int = 0, prefix: str = "", postfix: str = "_resampled", ) -> str: """WARNING: INTERNAL. DO NOT USE.""" raster_list, path_list = ready_io_raster(raster, out_path, overwrite, prefix, postfix) origin = open_raster(raster_list[0]) out_name = path_list[0] raster_metadata = raster_to_metadata(origin, create_geometry=True) # options warp_options = [] if all_touch: warp_options.append("CUTLINE_ALL_TOUCHED=TRUE") else: warp_options.append("CUTLINE_ALL_TOUCHED=FALSE") origin_projection: osr.SpatialReference = raster_metadata["projection_osr"] origin_extent: ogr.Geometry = raster_metadata["extent_geom_latlng"] target_projection = origin_projection if projection is not None: target_projection = parse_projection(projection) if clip_geom is not None: if is_raster(clip_geom): opened_raster = open_raster(clip_geom) clip_metadata_raster = raster_to_metadata(opened_raster, create_geometry=True) clip_ds = clip_metadata_raster["extent_datasource"] clip_metadata = internal_vector_to_metadata(clip_ds, create_geometry=True) elif is_vector(clip_geom): clip_ds = open_vector(clip_geom) clip_metadata = internal_vector_to_metadata(clip_ds, create_geometry=True) else: if file_exists(clip_geom): raise ValueError(f"Unable to parse clip geometry: {clip_geom}") else: raise ValueError(f"Unable to find clip geometry {clip_geom}") if layer_to_clip > (clip_metadata["layer_count"] - 1): raise ValueError("Requested an unable layer_to_clip.") clip_projection = clip_metadata["projection_osr"] clip_extent = clip_metadata["extent_geom_latlng"] # Fast check: Does the extent of the two inputs overlap? if not origin_extent.Intersects(clip_extent): raise Exception("Clipping geometry did not intersect raster.") # Check if projections match, otherwise reproject target geom. if not target_projection.IsSame(clip_projection): clip_metadata["extent"] = reproject_extent( clip_metadata["extent"], clip_projection, target_projection, ) # The extent needs to be reprojected to the target. # this ensures that adjust_bbox works. x_min_og, y_max_og, x_max_og, y_min_og = reproject_extent( raster_metadata["extent"], origin_projection, target_projection, ) output_bounds = (x_min_og, y_min_og, x_max_og, y_max_og ) # gdal_warp format if crop_to_geom: if adjust_bbox: output_bounds = align_bbox( raster_metadata["extent"], clip_metadata["extent"], raster_metadata["pixel_width"], raster_metadata["pixel_height"], warp_format=True, ) else: x_min_og, y_max_og, x_max_og, y_min_og = clip_metadata[ "extent"] output_bounds = ( x_min_og, y_min_og, x_max_og, y_max_og, ) # gdal_warp format if clip_metadata["layer_count"] > 1: clip_ds = vector_to_memory( clip_ds, memory_path=f"clip_geom_{uuid4().int}.gpkg", layer_to_extract=layer_to_clip, ) elif not isinstance(clip_ds, str): clip_ds = vector_to_memory( clip_ds, memory_path=f"clip_geom_{uuid4().int}.gpkg", ) if clip_ds is None: raise ValueError(f"Unable to parse input clip geom: {clip_geom}") x_res, y_res, x_pixels, y_pixels = raster_size_from_list( target_size, target_in_pixels) out_format = path_to_driver_raster(out_name) out_creation_options = default_options(creation_options) # nodata out_nodata = None if src_nodata is not None: out_nodata = raster_metadata["nodata_value"] else: if dst_nodata == "infer": out_nodata = gdal_nodata_value_from_type( raster_metadata["datatype_gdal_raw"]) else: out_nodata = dst_nodata # Removes file if it exists and overwrite is True. remove_if_overwrite(out_path, overwrite) warped = gdal.Warp( out_name, origin, xRes=x_res, yRes=y_res, width=x_pixels, height=y_pixels, cutlineDSName=clip_ds, outputBounds=output_bounds, format=out_format, srcSRS=origin_projection, dstSRS=target_projection, resampleAlg=translate_resample_method(resample_alg), creationOptions=out_creation_options, warpOptions=warp_options, srcNodata=src_nodata, dstNodata=out_nodata, targetAlignedPixels=False, cropToCutline=False, multithread=True, ) if warped is None: raise Exception(f"Error while warping raster: {raster}") return out_name
def zonal_statistics( in_vector, output_vector=None, in_rasters=[], prefixes=[], stats=["mean", "med", "std"], ): if len(prefixes) != 0: if len(in_rasters) != len(prefixes): raise ValueError("Unable to parse prefixes.") if isinstance(in_rasters, list): if len(in_rasters) == 0: raise ValueError("List of rasters (in_rasters) is empty.") if len(stats) == 0: raise ValueError("Unable to parse statistics (stats).") # Translate stats to integers stats_translated = stats_to_ints(stats) # Read the raster meta: raster_metadata = internal_raster_to_metadata(in_rasters[0]) vector = None if output_vector is None: vector = open_vector(in_vector, writeable=True) else: vector = internal_vector_to_memory(in_vector) vector_metadata = internal_vector_to_metadata(vector) vector_layer = vector.GetLayer() # Check that projections match if not vector_metadata["projection_osr"].IsSame( raster_metadata["projection_osr"]): if output_vector is None: vector = internal_reproject_vector(in_vector, in_rasters[0]) else: vector_path = internal_reproject_vector(in_vector, in_rasters[0], output_vector) vector = open_vector(vector_path, writeable=True) vector_metadata = internal_vector_to_metadata(vector) vector_layer = vector.GetLayer() vector_projection = vector_metadata["projection_osr"] raster_projection = raster_metadata["projection"] # Read raster data in overlap raster_transform = np.array(raster_metadata["transform"], dtype=np.float32) raster_size = np.array(raster_metadata["size"], dtype=np.int32) raster_extent = get_extent(raster_transform, raster_size) vector_extent = np.array(vector_layer.GetExtent(), dtype=np.float32) overlap_extent = get_intersection(raster_extent, vector_extent) if overlap_extent is False: print("raster_extent: ", raster_extent) print("vector_extent: ", vector_extent) raise Exception("Vector and raster do not overlap!") ( overlap_aligned_extent, overlap_aligned_rasterized_size, overlap_aligned_offset, ) = align_extent(raster_transform, overlap_extent, raster_size) overlap_transform = np.array( [ overlap_aligned_extent[0], raster_transform[1], 0, overlap_aligned_extent[3], 0, raster_transform[5], ], dtype=np.float32, ) overlap_size = overlap_size_calc(overlap_aligned_extent, raster_transform) # Loop the features vector_driver = ogr.GetDriverByName("Memory") vector_feature_count = vector_layer.GetFeatureCount() vector_layer.StartTransaction() # Create fields vector_layer_defn = vector_layer.GetLayerDefn() vector_field_counts = vector_layer_defn.GetFieldCount() vector_current_fields = [] # Get current fields for i in range(vector_field_counts): vector_current_fields.append( vector_layer_defn.GetFieldDefn(i).GetName()) # Add fields where missing for stat in stats: for i in range(len(in_rasters)): field_name = f"{prefixes[i]}{stat}" if field_name not in vector_current_fields: field_defn = ogr.FieldDefn(field_name, ogr.OFTReal) vector_layer.CreateField(field_defn) rasterized_features = [] sizes = np.zeros((vector_feature_count, 4), dtype="float32") offsets = np.zeros((vector_feature_count, 2), dtype=np.int32) raster_data = None for raster_index, raster_value in enumerate(in_rasters): columns = {} for stat in stats: columns[prefixes[raster_index] + stat] = [] fits_in_memory = True try: raster_data = raster_to_array( raster_value, crop=[ overlap_aligned_offset[0], overlap_aligned_offset[1], overlap_aligned_rasterized_size[0], overlap_aligned_rasterized_size[1], ], ) except: fits_in_memory = False print("Raster does not fit in memory.. Doing IO for each feature.") for n in range(vector_feature_count): vector_feature = vector_layer.GetNextFeature() rasterized_vector = None if raster_index == 0: try: vector_geom = vector_feature.GetGeometryRef() except: vector_geom.Buffer(0) Warning("Invalid geometry at : ", n) if vector_geom is None: raise Exception("Invalid geometry. Could not fix.") feature_extent = vector_geom.GetEnvelope() # Create temp layer temp_vector_datasource = vector_driver.CreateDataSource( f"vector_{n}") temp_vector_layer = temp_vector_datasource.CreateLayer( "temp_polygon", vector_projection, ogr.wkbPolygon) temp_vector_layer.CreateFeature(vector_feature.Clone()) ( feature_aligned_extent, feature_aligned_rasterized_size, feature_aligned_offset, ) = align_extent(overlap_transform, feature_extent, overlap_size) rasterized_vector = rasterize_vector( temp_vector_layer, feature_aligned_extent, feature_aligned_rasterized_size, raster_projection, ) rasterized_features.append(rasterized_vector) offsets[n] = feature_aligned_offset sizes[n] = feature_aligned_rasterized_size if fits_in_memory is True: cropped_raster = raster_data[offsets[n][1]:offsets[n][1] + int(sizes[n][1]), # X offsets[n][0]:offsets[n][0] + int(sizes[n][0]), # Y ] else: cropped_raster = raster_to_array( raster_value, crop=[ overlap_aligned_offset[0] + offsets[n][0], overlap_aligned_offset[1] + offsets[n][1], int(sizes[n][0]), int(sizes[n][1]), ], ) if rasterized_features[n] is None: for stat in stats: field_name = f"{prefixes[raster_index]}{stat}" vector_feature.SetField(field_name, None) elif cropped_raster is None: for stat in stats: field_name = f"{prefixes[raster_index]}{stat}" vector_feature.SetField(field_name, None) else: raster_data_masked = np.ma.masked_array( cropped_raster, mask=rasterized_features[n], dtype="float32").compressed() zonal_stats = calculate_array_stats(raster_data_masked, stats_translated) for index, stat in enumerate(stats): field_name = f"{prefixes[raster_index]}{stat}" vector_feature.SetField(field_name, float(zonal_stats[index])) vector_layer.SetFeature(vector_feature) progress(n, vector_feature_count, name=prefixes[raster_index]) vector_layer.ResetReading() vector_layer.CommitTransaction() if output_vector is None: return vector return output_vector
def raster_to_grid( raster: Union[str, gdal.Dataset], grid: Union[str, ogr.DataSource], out_dir: str, use_field: Optional[str] = None, generate_vrt: bool = True, overwrite: bool = True, process_layer: int = 0, creation_options: list = [], verbose: int = 1, ) -> Union[List[str], Tuple[Optional[List[str]], Optional[str]]]: """Clips a raster to a grid. Generate .vrt. Returns: The filepath for the newly created raster. """ type_check(raster, [str, gdal.Dataset], "raster") type_check(grid, [str, ogr.DataSource], "grid") type_check(out_dir, [str], "out_dir") type_check(overwrite, [bool], "overwrite") type_check(process_layer, [int], "process_layer") type_check(creation_options, [list], "creation_options") type_check(verbose, [int], "verbose") use_grid = open_vector(grid) grid_metadata = internal_vector_to_metadata(use_grid) raster_metadata = raster_to_metadata(raster, create_geometry=True) # Reproject raster if necessary. if not raster_metadata["projection_osr"].IsSame(grid_metadata["projection_osr"]): use_grid = reproject_vector(grid, raster_metadata["projection_osr"]) grid_metadata = internal_vector_to_metadata(use_grid) if not isinstance(grid_metadata, dict): raise Exception("Error while parsing metadata.") # Only use the polygons in the grid that intersect the extent of the raster. use_grid = intersect_vector(use_grid, raster_metadata["extent_datasource"]) ref = open_raster(raster) use_grid = open_vector(use_grid) layer = use_grid.GetLayer(process_layer) feature_count = layer.GetFeatureCount() raster_extent = raster_metadata["extent_ogr"] filetype = path_to_ext(raster) name = raster_metadata["name"] geom_type = grid_metadata["layers"][process_layer]["geom_type_ogr"] if use_field is not None: if use_field not in grid_metadata["layers"][process_layer]["field_names"]: names = grid_metadata["layers"][process_layer]["field_names"] raise ValueError( f"Requested field not found. Fields available are: {names}" ) generated = [] # For the sake of good reporting - lets first establish how many features intersect # the raster. if verbose: print("Finding intersections.") intersections = 0 for _ in range(feature_count): feature = layer.GetNextFeature() geom = feature.GetGeometryRef() if not ogr_bbox_intersects(raster_extent, geom.GetEnvelope()): continue intersections += 1 layer.ResetReading() if verbose: print(f"Found {intersections} intersections.") if intersections == 0: print("Warning: Found 0 intersections. Returning empty list.") return ([], None) # TODO: Replace this in gdal. 3.1 driver = ogr.GetDriverByName("Esri Shapefile") clipped = 0 for _ in range(feature_count): feature = layer.GetNextFeature() geom = feature.GetGeometryRef() if not ogr_bbox_intersects(raster_extent, geom.GetEnvelope()): continue if verbose == 1: progress(clipped, intersections - 1, "clip_grid") fid = feature.GetFID() test_ds_path = f"/vsimem/grid_{uuid4().int}.shp" test_ds = driver.CreateDataSource(test_ds_path) test_ds_lyr = test_ds.CreateLayer( "mem_layer_grid", geom_type=geom_type, srs=raster_metadata["projection_osr"], ) test_ds_lyr.CreateFeature(feature.Clone()) test_ds.FlushCache() out_name = None if use_field is not None: out_name = f"{out_dir}{feature.GetField(use_field)}{filetype}" else: out_name = f"{out_dir}{name}_{fid}{filetype}" clip_raster( ref, test_ds_path, out_path=out_name, adjust_bbox=True, crop_to_geom=True, all_touch=False, postfix="", prefix="", creation_options=default_options(creation_options), verbose=0, ) generated.append(out_name) clipped += 1 if generate_vrt: vrt_name = f"{out_dir}{name}.vrt" stack_rasters_vrt(generated, vrt_name, seperate=False) return (generated, vrt_name) return generated