def get_geometry_union(inpath, epsg, attribute_filter=None): """ TODO: Remove this method and replace all references to the get_geometry_unary_union method below Load all features from a ShapeFile and union them together into a single geometry :param inpath: Path to a ShapeFile :param epsg: Desired output spatial reference :return: Single Shapely geometry of all unioned features """ log = Logger('Shapefile') driver = ogr.GetDriverByName("ESRI Shapefile") data_source = driver.Open(inpath, 0) layer = data_source.GetLayer() in_spatial_ref = layer.GetSpatialRef() if attribute_filter: layer.SetAttributeFilter(attribute_filter) _out_spatial_ref, transform = get_transform_from_epsg(in_spatial_ref, epsg) geom = None progbar = ProgressBar(layer.GetFeatureCount(), 50, "Unioning features") counter = 0 for feature in layer: counter += 1 progbar.update(counter) new_geom = feature.GetGeometryRef() if new_geom is None: progbar.erase() # get around the progressbar log.warning('Feature with FID={} has no geometry. Skipping'.format( feature.GetFID())) continue new_geom.Transform(transform) new_shape = wkbload(new_geom.ExportToWkb()) try: geom = geom.union(new_shape) if geom else new_shape except Exception as e: progbar.erase() # get around the progressbar log.warning( 'Union failed for shape with FID={} and will be ignored'. format(feature.GetFID())) progbar.finish() data_source = None return geom
def load_geometries(feature_class, id_field, epsg=None): log = Logger('Shapefile') # Get the input network driver = ogr.GetDriverByName('ESRI Shapefile') dataset = driver.Open(feature_class, 0) layer = dataset.GetLayer() in_spatial_ref = layer.GetSpatialRef() # Determine the transformation if user provides an EPSG transform = None if epsg: out_spatial_ref, transform = get_transform_from_epsg( in_spatial_ref, epsg) features = {} progbar = ProgressBar(layer.GetFeatureCount(), 50, "Loading features") counter = 0 for inFeature in layer: counter += 1 progbar.update(counter) reach = inFeature.GetField(id_field) geom = inFeature.GetGeometryRef() # Optional coordinate transformation if transform: geom.Transform(transform) new_geom = wkbload(geom.ExportToWkb()) geo_type = new_geom.GetGeometryType() if new_geom.is_empty: progbar.erase() # get around the progressbar log.warning( 'Empty feature with FID={} cannot be unioned and will be ignored' .format(inFeature.GetFID())) elif not new_geom.is_valid: progbar.erase() # get around the progressbar log.warning( 'Invalid feature with FID={} cannot be unioned and will be ignored' .format(inFeature.GetFID())) # Filter out zero-length lines elif geo_type in LINE_TYPES and new_geom.Length() == 0: progbar.erase() # get around the progressbar log.warning('Zero Length for feature with FID={}'.format( inFeature.GetFID())) # Filter out zero-area polys elif geo_type in POLY_TYPES and new_geom.Area() == 0: progbar.erase() # get around the progressbar log.warning('Zero Area for feature with FID={}'.format( inFeature.GetFID())) else: features[reach] = new_geom progbar.finish() dataset = None return features
def merge_geometries(feature_classes, epsg): """ Load all features from multiple feature classes into a single list of geometries :param feature_classes: :param epsg: :return: """ log = Logger('Shapefile') driver = ogr.GetDriverByName("ESRI Shapefile") union = ogr.Geometry(ogr.wkbMultiLineString) fccount = 0 for fc in feature_classes: fccount += 1 log.info("Merging Geometries for feature class {}/{}".format( fccount, len(feature_classes))) data_source = driver.Open(fc, 0) layer = data_source.GetLayer() in_spatial_ref = layer.GetSpatialRef() out_spatial_ref, transform = get_transform_from_epsg( in_spatial_ref, epsg) progbar = ProgressBar(layer.GetFeatureCount(), 50, "Merging Geometries") counter = 0 for feature in layer: counter += 1 progbar.update(counter) geom = feature.GetGeometryRef() if geom is None: progbar.erase() # get around the progressbar log.warning( 'Feature with FID={} has no geoemtry. Skipping'.format( feature.GetFID())) continue geom.Transform(transform) union.AddGeometry(geom) progbar.finish() data_source = None return union
def extract_mean_values_by_polygon(polys, rasters, reference_raster): log = Logger('extract_mean_values_by_polygon') progbar = ProgressBar(len(polys), 50, "Extracting Mean values...") counter = 0 with rasterio.open(reference_raster) as dataset: output_mean = {} output_unique = {} for reachid, poly in polys.items(): counter += 1 progbar.update(counter) if poly.geom_type in ["Polygon", "MultiPolygon"] and poly.area > 0: values_mean = {} values_unique = {} reach_raster = np.ma.masked_invalid( features.rasterize( [poly], out_shape=dataset.shape, transform=dataset.transform, all_touched=True, fill=np.nan)) for key, raster in rasters.items(): if raster is not None: current_raster = np.ma.masked_array(raster, mask=reach_raster.mask) values_mean[key] = np.ma.mean(current_raster) values_unique[key] = np.unique(np.ma.filled(current_raster, fill_value=0), return_counts=True) else: values_mean[key] = 0.0 values_unique[key] = [] output_mean[reachid] = values_mean output_unique[reachid] = values_unique # log.debug(f"Reach: {reachid} | {sum([v for v in values.values() if v is not None]):.2f}") else: progbar.erase() log.warning(f"Reach: {reachid} | WARNING no geom") progbar.finish() return output_mean, output_unique
def merge_feature_classes(feature_classes, epsg, boundary, outpath): log = Logger('Shapefile') if os.path.isfile(outpath): log.info('Skipping merging feature classes because file exists.') return safe_makedirs(os.path.dirname(outpath)) log.info('Merging {} feature classes.'.format(len(feature_classes))) driver = ogr.GetDriverByName("ESRI Shapefile") # Create the output shapefile outDataSource = driver.CreateDataSource(outpath) outLayer = None outSpatialRef = None transform = None fccount = 0 for inpath in feature_classes: fccount += 1 log.info("Merging feature class {}/{}".format(fccount, len(feature_classes))) inDataSource = driver.Open(inpath, 0) inLayer = inDataSource.GetLayer() inSpatialRef = inLayer.GetSpatialRef() inLayer.SetSpatialFilter(ogr.CreateGeometryFromWkb(boundary.wkb)) # First input spatial ref sets the SRS for the output file outSpatialRefTmp, transform = get_transform_from_epsg( inSpatialRef, epsg) if outLayer is None: outSpatialRef = outSpatialRefTmp outLayer = outDataSource.CreateLayer( 'network', outSpatialRef, geom_type=ogr.wkbMultiLineString) outLayerDefn = outLayer.GetLayerDefn() # Transfer fields over inLayerDef = inLayer.GetLayerDefn() for i in range(inLayerDef.GetFieldCount()): inFieldDef = inLayerDef.GetFieldDefn(i) # Only create fields if we really don't have them # NOTE: THIS ASSUMES ALL FIELDS OF THE SAME NAME HAVE THE SAME TYPE if outLayerDefn.GetFieldIndex(inFieldDef.GetName()) == -1: outLayer.CreateField(inFieldDef) progbar = ProgressBar(inLayer.GetFeatureCount(), 50, "Processing features") outLayerDefn = outLayer.GetLayerDefn() counter = 0 for feature in inLayer: counter += 1 progbar.update(counter) geom = feature.GetGeometryRef() if geom is None: progbar.erase() # get around the progressbar log.warning( 'Feature with FID={} has no geometry. Skipping'.format( feature.GetFID())) continue geom.Transform(transform) # get a Shapely representation of the line # featobj = json.loads(geom.ExportToJson()) # polyline = shape(featobj) # if boundary.intersects(polyline): # clipped = boundary.intersection(polyline) outFeature = ogr.Feature(outLayerDefn) for i in range(inLayerDef.GetFieldCount()): outFeature.SetField( outLayerDefn.GetFieldDefn(i).GetNameRef(), feature.GetField(i)) outFeature.SetGeometry(geom) outLayer.CreateFeature(outFeature) feature = None outFeature = None progbar.finish() inDataSource = None outDataSource = None log.info('Merge complete.')
def copy_feature_class(inpath, epsg, outpath, clip_shape=None, attribute_filter=None): """Copy a Shapefile from one location to another This method is capable of reprojecting the geometries as they are copied. It is also possible to filter features by both attributes and also clip the features to another geometryNone Arguments: inpath {str} -- File path to input Shapefile that will be copied. epsg {int} -- Output coordinate system outpath {str} -- File path where the output Shapefile will be generated. Keyword Arguments: clip_shape {shape} -- Shapely polygon geometry in the output EPSG used to clip the input geometries (default: {None}) attribute_filter {str} -- Attribute filter used to limit the input features that will be copied. (default: {None}) """ log = Logger('Shapefile') # if os.path.isfile(outpath): # log.info('Skipping copy of feature classes because output file exists.') # return driver = ogr.GetDriverByName("ESRI Shapefile") inDataSource = driver.Open(inpath, 0) inLayer = inDataSource.GetLayer() inSpatialRef = inLayer.GetSpatialRef() geom_type = inLayer.GetGeomType() # Optionally limit which features are copied by using an attribute filter if attribute_filter: inLayer.SetAttributeFilter(attribute_filter) # If there's a clip geometry provided then limit the features copied to # those that intersect (partially or entirely) by this clip feature. # Note that this makes the subsequent intersection process a lot more # performant because the SetSaptialFilter() uses the ShapeFile's spatial # index which is much faster than manually checking if all pairs of features intersect. clip_geom = None if clip_shape: clip_geom = ogr.CreateGeometryFromWkb(clip_shape.wkb) inLayer.SetSpatialFilter(clip_geom) outpath_dir = os.path.dirname(outpath) safe_makedirs(outpath_dir) # Create the output shapefile outSpatialRef, transform = get_transform_from_epsg(inSpatialRef, epsg) outDataSource = driver.CreateDataSource(outpath) outLayer = outDataSource.CreateLayer('network', outSpatialRef, geom_type=geom_type) outLayerDefn = outLayer.GetLayerDefn() # Add input Layer Fields to the output Layer if it is the one we want inLayerDefn = inLayer.GetLayerDefn() for i in range(0, inLayerDefn.GetFieldCount()): fieldDefn = inLayerDefn.GetFieldDefn(i) outLayer.CreateField(fieldDefn) # Get the output Layer's Feature Definition outLayerDefn = outLayer.GetLayerDefn() progbar = ProgressBar(inLayer.GetFeatureCount(), 50, "Copying features") counter = 0 for feature in inLayer: counter += 1 progbar.update(counter) geom = feature.GetGeometryRef() if geom is None: progbar.erase() # get around the progressbar log.warning('Feature with FID={} has no geometry. Skipping'.format( feature.GetFID())) continue geom.Transform(transform) # if clip_shape: # raw = shape(json.loads(geom.ExportToJson())) # try: # clip = raw.intersection(clip_shape) # geom = ogr.CreateGeometryFromJson(json.dumps(mapping(clip))) # except Exception as e: # progbar.erase() # get around the progressbar # log.warning('Invalid shape with FID={} cannot be intersected'.format(feature.GetFID())) # Create output Feature outFeature = ogr.Feature(outLayerDefn) outFeature.SetGeometry(geom) # Add field values from input Layer for i in range(0, outLayerDefn.GetFieldCount()): outFeature.SetField( outLayerDefn.GetFieldDefn(i).GetNameRef(), feature.GetField(i)) outLayer.CreateFeature(outFeature) outFeature = None progbar.finish() inDataSource = None outDataSource = None
def get_geometry_unary_union_from_wkt(inpath, to_sr_wkt): """ Load all features from a ShapeFile and union them together into a single geometry :param inpath: Path to a ShapeFile :param epsg: Desired output spatial reference :return: Single Shapely geometry of all unioned features """ log = Logger('Unary Union') driver = ogr.GetDriverByName("ESRI Shapefile") data_source = driver.Open(inpath, 0) layer = data_source.GetLayer() in_spatial_ref = layer.GetSpatialRef() out_spatial_ref, transform = get_transform_from_wkt( in_spatial_ref, to_sr_wkt) fcount = layer.GetFeatureCount() progbar = ProgressBar(fcount, 50, "Unary Unioning features") counter = 0 def unionize(wkb_lst): return unary_union([wkbload(g) for g in wkb_lst]).wkb geom_list = [] for feature in layer: counter += 1 progbar.update(counter) new_geom = feature.GetGeometryRef() geo_type = new_geom.GetGeometryType() # We can't union non-valid shapes but sometimes a buffer by 0 can help if not new_geom.IsValid(): progbar.erase() # get around the progressbar log.warning( 'Invalid shape with FID={} trying the Buffer0 technique...'. format(feature.GetFID())) try: new_geom = new_geom.Buffer(0) if not new_geom.IsValid(): progbar.erase() # get around the progressbar log.warning(' Still invalid. Skipping this geometry') continue except Exception as e: progbar.erase() # get around the progressbar log.warning( 'Exception raised during buffer 0 technique. skipping this file' ) continue if new_geom is None: progbar.erase() # get around the progressbar log.warning('Feature with FID={} has no geoemtry. Skipping'.format( feature.GetFID())) # Filter out zero-length lines elif geo_type in LINE_TYPES and new_geom.Length() == 0: progbar.erase() # get around the progressbar log.warning('Zero Length for shape with FID={}'.format( feature.GetFID())) # Filter out zero-area polys elif geo_type in POLY_TYPES and new_geom.Area() == 0: progbar.erase() # get around the progressbar log.warning('Zero Area for shape with FID={}'.format( feature.GetFID())) else: new_geom.Transform(transform) geom_list.append(new_geom.ExportToWkb()) # IF we get past a certain size then run the union if len(geom_list) >= 500: geom_list = [unionize(geom_list)] new_geom = None log.debug('finished iterating with list of size: {}'.format( len(geom_list))) progbar.finish() if len(geom_list) > 1: log.debug('Starting final union of geom_list of size: {}'.format( len(geom_list))) # Do a final union to clean up anything that might still be in the list geom_union = wkbload(unionize(geom_list)) elif len(geom_list) == 0: log.warning('No geometry found to union') return None else: log.debug('FINAL Unioning geom_list of size {}'.format(len(geom_list))) geom_union = wkbload(geom_list[0]) log.debug(' done') print_geom_size(log, geom_union) log.debug('Complete') data_source = None return geom_union
def process_modis(out_sqlite, modis_folder, nhd_folder, verbose, debug_flag): """Generate land surface temperature sqlite db from NHD+ and MODIS data """ log = Logger("Process LST") if os.path.isfile(out_sqlite): os.remove(out_sqlite) # Create sqlite database conn = sqlite3.connect(out_sqlite) cursor = conn.cursor() # test if table exists? cursor.execute( """SELECT COUNT(name) FROM sqlite_master WHERE type='table' AND name='MODIS_LST' """ ) log.info('Creating DB') if cursor.fetchone()[0] == 0: cursor.execute(""" CREATE TABLE MODIS_LST ( NHDPlusID INTEGER NOT NULL, MODIS_Scene DATETIME NOT NULL, LST REAL, PRIMARY KEY ( NHDPlusID, MODIS_Scene ) ) WITHOUT ROWID; """) conn.commit() # populate list of modis files modis_files = glob.glob(os.path.join(modis_folder, "*.tif")) # Load NHD Layers log.info(f"Processing NHD Data: {nhd_folder}") in_driver = ogr.GetDriverByName("OpenFileGDB") in_datasource = in_driver.Open(nhd_folder, 0) layer_hucs = in_datasource.GetLayer(r"WBDHU8_reproject") # Process HUC huc_counter = 0 total_hucs = layer_hucs.GetFeatureCount() for huc in layer_hucs: huc_counter += 1 huc_id = huc.GetField(r"HUC8") log.info('Processing huc:{} ({}/{})'.format(huc_id, huc_counter, total_hucs)) log.info(f"HUC: {huc_id}") huc_geom = huc.GetGeometryRef() layer_catchments = None layer_catchments = in_datasource.GetLayer( r"NHDPlusCatchment_reproject") # layer_catchments.SetSpatialFilter(huc_geom) catchments not perfectly aligned with hucs layer_catchments.SetAttributeFilter(f"""HUC8 = {huc_id}""") huc_bounds = huc_geom.GetEnvelope() bbox = box(huc_bounds[0], huc_bounds[2], huc_bounds[1], huc_bounds[3]) # open a single MODIS raster and load its projection and transform based on current huc with rasterio.open(f"{modis_files[0]}") as dataset: data, modis_transform = mask(dataset, [bbox], all_touched=True, crop=True) # Assuming there is only one band we can drop the first dimenson and get (36,78) instead of (1,36,78) modis_shape = data.shape[1:] # Read all MODIS Scences into array modis_array_raw = np.ma.array( [load_cropped_raster(image, bbox) for image in modis_files]) modis_array_sds = np.ma.masked_where(modis_array_raw == 0, modis_array_raw) # Make sure we mask out the invalid data modis_array_K = modis_array_sds * 0.02 modis_array_C = modis_array_K - 273.15 # K to C # Generate list of MODIS scene dates modis_dates = np.array([ os.path.basename(image).lstrip("A").rstrip(".tif") for image in modis_files ]) # Calcuate average LST per Catchemnt Layer progbar = ProgressBar(layer_catchments.GetFeatureCount(), 50, 'Processing HUC: {}'.format(huc_id)) reach_counter = 0 progbar.update(reach_counter) # loop_timer = LoopTimer("LoopTime", useMs=True) for reach in layer_catchments: reach_counter += 1 progbar.update(reach_counter) # If debug flag is set then drop a CSV for every 5000 reaches debug_drop = debug_flag is True and reach_counter % 5000 == 1 # For Debugging performance # loop_timer.tick() # loop_timer.progprint() nhd_id = int(reach.GetField("NHDPlusID")) # load_catchment_polygon and transform to raster SRS reach_geom = reach.GetGeometryRef() catch_poly = loads(reach_geom.ExportToWkb()) # Catchment polygons are vectorized rasters and they can have invalid geometries if not catch_poly.is_valid: log.warning( 'Invalid catchment polygon detected. Trying the buffer technique: {}' .format(nhd_id)) catch_poly = catch_poly.buffer(0) # Generate mask raster of catchment pixels reach_raster = np.ma.masked_invalid( rasterio.features.rasterize([catch_poly], out_shape=modis_shape, transform=modis_transform, all_touched=True, fill=np.nan)) # Now assign ascending integers to each cell. THis is so the rasterio.features.shapes gives us a unique shape for every cell reach_raster_idx = np.ma.masked_array( np.arange(modis_shape[0] * modis_shape[1], dtype=np.int32).reshape(modis_shape), # pylint: disable=E1101 reach_raster.mask) # Generate a unique shape for each valid pixel geoms = [{ 'properties': { 'name': 'modis_pixel', 'raster_val': int(v), 'valid': v > 0 }, 'geometry': geom } for i, (geom, v) in enumerate( rasterio.features.shapes(reach_raster_idx, transform=modis_transform)) if test_pixel_geom(geom)] # Now create our weights array. Start with weights of 0 so we can rule out any weird points weights_raster_arr = np.ma.masked_array( np.full(modis_shape, 0, dtype=np.float32), # pylint: disable=E1101 reach_raster.mask, ) for geom in geoms: pxl = shape(geom['geometry']) poly_intersect = pxl.intersection(catch_poly) idx, idy = find_indeces(geom['properties']['raster_val'], modis_shape) weight = poly_intersect.area / catch_poly.area # For debugging if debug_drop: geom['type'] = "Feature" geom['properties']['weight'] = weight geom['properties']['raster_coords'] = [idx, idy] geom['properties']['world_coords'] = [ pxl.centroid.coords[0][0], pxl.centroid.coords[0][1] ] weights_raster_arr[idx][idy] = weight # Calculate average weighted modis ave = np.ma.average(modis_array_C, axis=(1, 2), weights=np.broadcast_to( weights_raster_arr, modis_array_C.shape)) # Just some useful debugging stuff if debug_drop: progbar.erase() file_prefix = '{}-{}-debug'.format(huc_id, nhd_id) log.debug('Dropping files: {}'.format(file_prefix)) # PrintArr(reach_raster_idx) # Dump some useful shapes to a geojson Object _debug_shape = DebugGeoJSON( os.path.join(os.path.dirname(out_sqlite), '{}.geojson'.format(file_prefix))) _debug_shape.add_shapely(bbox, {"name": "bbox"}) _debug_shape.add_shapely(catch_poly, {"name": "catch_poly"}) [_debug_shape.add_geojson(gj) for gj in geoms] _debug_shape.write() # Now dump an CSV array report for fun csv_file = os.path.join(os.path.dirname(out_sqlite), '{}.csv'.format(file_prefix)) with open(csv_file, 'w') as csv_file: csvw = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csvw.writerow(['HUC', 'NHDPlusID', 'Area']) csvw.writerow([huc_id, nhd_id, catch_poly.area]) csvw.writerow([]) debug_weights = [] # Summary of intersected pixels for geom in geoms: debug_weights.append( (geom['properties']['weight'], geom['properties']['raster_coords'])) # Dump the weights Cell values so we can use excel to calculate them manually # Write the average and the csvw.writerow(['Intersecting Cells:'] + [' ' for g in geoms]) for key, name in { 'raster_val': 'cell_id', 'raster_coords': '[row,col]', 'world_coords': '[x,y]', 'weight': 'weight' }.items(): csvw.writerow([name] + [g['properties'][key] for g in geoms]) csvw.writerow([]) csvw.writerow(['Date'] + [' ' for g in geoms] + ['np.ma.average']) for didx, ave_val in enumerate(ave): csvw.writerow([modis_dates[didx]] + [ modis_array_sds[didx][w[1][0]][w[1][1]] for w in debug_weights ] + [ave_val]) # insert_lst_into_sqlite cursor.executemany("""INSERT INTO MODIS_LST VALUES(?,?,?)""", [ (nhd_id, datetime.datetime.strptime(modis_date, "%Y%j").date(), float(v) if float(v) != 0 else None) for (modis_date, v) in zip(modis_dates, ave.data) ]) # Write data to sqlite after each reach conn.commit() # Close database connection conn.close() return