def test_bbox_offbyone(): # Make sure we don't get the off-by-one error in calculating src offset rgt = (-4418000.0, 250.0, 0.0, 4876500.0, 0.0, -250.0) geom_bounds = [4077943.9961, -3873500.0, 4462000.0055, -3505823.7582] rshape = (37000, 35000) so = bbox_to_pixel_offsets(rgt, geom_bounds, rshape) assert so[1] + so[3] == rshape[1] # Another great example # based on https://github.com/perrygeo/python-raster-stats/issues/46 rgt = (151.2006, 0.025, 0.0, -25.4896, 0.0, -0.025) geom_bounds = [153.39775866026284, -28.903022885889843, 153.51344076545288, -28.80117672778147] rshape = (92, 135) # should only be 5 pixels wide, not 6 due to rounding errors assert bbox_to_pixel_offsets(rgt, geom_bounds, rshape) == (87, 132, 5, 3)
def test_bbox_offbyone(): # Make sure we don't get the off-by-one error in calculating src offset rgt = (-4418000.0, 250.0, 0.0, 4876500.0, 0.0, -250.0) geom_bounds = [4077943.9961, -3873500.0, 4462000.0055, -3505823.7582] so = bbox_to_pixel_offsets(rgt, geom_bounds) rsize = (37000, 35000) assert so[1] + so[3] == rsize[1]
def test_bbox_offbyone(): # Make sure we don't get the off-by-one error in calculating src offset rgt = (-4418000.0, 250.0, 0.0, 4876500.0, 0.0, -250.0) geom_bounds = [4077943.9961, -3873500.0, 4462000.0055, -3505823.7582] rshape = (37000, 35000) so = bbox_to_pixel_offsets(rgt, geom_bounds, rshape) assert so[1] + so[3] == rshape[1] # Another great example # based on https://github.com/perrygeo/python-raster-stats/issues/46 rgt = (151.2006, 0.025, 0.0, -25.4896, 0.0, -0.025) geom_bounds = [ 153.39775866026284, -28.903022885889843, 153.51344076545288, -28.80117672778147 ] rshape = (92, 135) # should only be 5 pixels wide, not 6 due to rounding errors assert bbox_to_pixel_offsets(rgt, geom_bounds, rshape) == (87, 132, 5, 3)
def masked_subset( rds, geom, all_touched=False ): """ Return a subset of rds where the extent is the bounding box of geom and all cells outside of geom are masked. Parameters ---------- rds : OpticalRS.RasterDS The raster dataset that you want to subset geom : shapely geometry The polygon bounding the area of interest. Returns ------- numpy.ma.MaskedArray A numpy masked array of shape (Rows,Columns,Bands). Cells not within geom will be masked as will any values that were masked in rds. """ # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rds.gdal_ds.GetGeoTransform(),\ list(geom.bounds), \ (rds.gdal_ds.RasterXSize, rds.gdal_ds.RasterYSize) ) #return src_offset barr = rds.band_array_subset( *src_offset ) nbands = barr.shape[-1] band_num = 1 epsg = rds.epsg rb = rds.gdal_ds.GetRasterBand(1) nodata_value = rb.GetNoDataValue() geom_mask = mask_from_geom( geom, rds, band_num=band_num, epsg=epsg, \ nodata_value=nodata_value, all_touched=all_touched ) for bn in range( nbands ): barr[:,:,bn].mask = barr[:,:,bn].mask | geom_mask return barr
def masked_subset(rds, geom, all_touched=False): """ Return a subset of rds where the extent is the bounding box of geom and all cells outside of geom are masked. Parameters ---------- rds : OpticalRS.RasterDS The raster dataset that you want to subset geom : shapely geometry The polygon bounding the area of interest. Returns ------- numpy.ma.MaskedArray A numpy masked array of shape (Rows,Columns,Bands). Cells not within geom will be masked as will any values that were masked in rds. """ # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rds.gdal_ds.GetGeoTransform(),\ list(geom.bounds), \ (rds.gdal_ds.RasterXSize, rds.gdal_ds.RasterYSize) ) #return src_offset barr = rds.band_array_subset(*src_offset) nbands = barr.shape[-1] band_num = 1 epsg = rds.epsg rb = rds.gdal_ds.GetRasterBand(1) nodata_value = rb.GetNoDataValue() geom_mask = mask_from_geom( geom, rds, band_num=band_num, epsg=epsg, \ nodata_value=nodata_value, all_touched=all_touched ) for bn in range(nbands): barr[:, :, bn].mask = barr[:, :, bn].mask | geom_mask return barr
def zonal_stats(vectors, raster, layer_num=0, band_num=1, func=None, nodata_value=None, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None): if not stats: if not categorical: stats = ['count', 'min', 'max', 'mean', 'std'] if func: stats.append('func') # must have transform arg if not transform: raise Exception("Must provide the 'transform' kwarg") rgt = transform rsize = (raster.shape[1], raster.shape[0]) rbounds = raster_extent_as_bounds(rgt, rsize) features_iter, strategy, spatial_ref = get_features(vectors, layer_num) global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] entity_images = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon( [box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) img = {'__fid__': i, 'img': None} else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies # are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters # need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=True']) else: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = np.ma.MaskedArray(src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array))) feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) if 'std' in stats: feature_stats['std'] = float(masked.std()) # optional if 'func' in stats: feature_stats[func.__name__] = func(masked) if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin img = {'__fid__': i, 'img': masked} # Use the enumerated id as __fid__ feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) entity_images.append(img) return results, entity_images
def mask_from_geom( geom, rds, band_num=1, epsg=32760, nodata_value=None, all_touched=False, full_extent=False ): """ Return a binary mask to mask off everything outside of geom. Parameters ---------- geom : shapely.geometry Cells inside this geometry will be `False`. Cells outside will be `True` rds : RasterDS The raster dataset to create a mask for. full_extent : bool If `True`, return a mask that's the full extent of `rds`. If `False` (default), return a mask that's the extent of `geom`. Returns ------- numpy boolean array An array with `True` outside `geom` and `False` inside. """ spatial_ref = osr.SpatialReference() spatial_ref.ImportFromEPSG(epsg) mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') rb = rds.gdal_ds.GetRasterBand(band_num) rgt = rds.gdal_ds.GetGeoTransform() rsize = (rds.gdal_ds.RasterXSize, rds.gdal_ds.RasterYSize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell # buff = rgt[1] / 2.0 # if geom.type == "MultiPoint": # geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) # for pt in geom.geoms]) # elif geom.type == 'Point': # geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) if full_extent: geom_bounds = list(rds.raster_extent.bounds) else: geom_bounds = list(geom.bounds) # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rsize) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate return None else: # use feature's source extent and read directly from source # fastest option when you have fast disks and well-indexed raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster #src_array = rb.ReadAsArray(*src_offset) # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options = ['ALL_TOUCHED=True']) else: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options = ['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly # masked = np.ma.MaskedArray( # src_array, # mask=np.logical_or( # src_array == nodata_value, # np.logical_not(rv_array) # ) # ) # return masked # return np.logical_or( src_array == nodata_value, np.logical_not( rv_array ) ) return ~rv_array.astype('bool')
def zonal_stats(vectors, raster, layer_num=0, band_num=1, func=None, nodata_value=None, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None): if not stats: if not categorical: stats = ['count', 'min', 'max', 'mean', 'std'] if func: stats.append('func') # must have transform arg if not transform: raise Exception("Must provide the 'transform' kwarg") rgt = transform rsize = (raster.shape[1], raster.shape[0]) rbounds = raster_extent_as_bounds(rgt, rsize) features_iter, strategy, spatial_ref = get_features(vectors, layer_num) global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] entity_images = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) img = {'__fid__': i, 'img': None} else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies # are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters # need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create( 'rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer( rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=True']) else: gdal.RasterizeLayer( rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = np.ma.MaskedArray( src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array) ) ) feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) if 'std' in stats: feature_stats['std'] = float(masked.std()) # optional if 'func' in stats: feature_stats[func.__name__] = func(masked) if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin img = {'__fid__': i, 'img': masked} # Use the enumerated id as __fid__ feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) entity_images.append(img) return results, entity_images
def mask_from_geom(geom, rds, band_num=1, epsg=32760, nodata_value=None, all_touched=False, full_extent=False): """ Return a binary mask to mask off everything outside of geom. Parameters ---------- geom : shapely.geometry Cells inside this geometry will be `False`. Cells outside will be `True` rds : RasterDS The raster dataset to create a mask for. full_extent : bool If `True`, return a mask that's the full extent of `rds`. If `False` (default), return a mask that's the extent of `geom`. Returns ------- numpy boolean array An array with `True` outside `geom` and `False` inside. """ spatial_ref = osr.SpatialReference() spatial_ref.ImportFromEPSG(epsg) mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') rb = rds.gdal_ds.GetRasterBand(band_num) rgt = rds.gdal_ds.GetGeoTransform() rsize = (rds.gdal_ds.RasterXSize, rds.gdal_ds.RasterYSize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell # buff = rgt[1] / 2.0 # if geom.type == "MultiPoint": # geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) # for pt in geom.geoms]) # elif geom.type == 'Point': # geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) if full_extent: geom_bounds = list(rds.raster_extent.bounds) else: geom_bounds = list(geom.bounds) # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rsize) new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate return None else: # use feature's source extent and read directly from source # fastest option when you have fast disks and well-indexed raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster #src_array = rb.ReadAsArray(*src_offset) # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=True']) else: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly # masked = np.ma.MaskedArray( # src_array, # mask=np.logical_or( # src_array == nodata_value, # np.logical_not(rv_array) # ) # ) # return masked # return np.logical_or( src_array == nodata_value, np.logical_not( rv_array ) ) return ~rv_array.astype('bool')