def zonalStats(inShp, inRaster, bandNum=1, mask_A=None, reProj=False, minVal='', maxVal='', verbose=False, rastType='N', unqVals=[], weighted=False, allTouched=False): ''' Run zonal statistics against an input shapefile. Returns array of SUM, MIN, MAX, and MEAN INPUT VARIABLES inShp [string or geopandas object] - path to input shapefile inRaster [string or rasterio object] - path to input raster OPTIONAL bandNum [integer] - band in raster to analyze reProj [boolean] - whether to reproject data to match, if not, raise an error minVal/maxVal [number] - if defined, will only calculate statistics on values above or below this number verbose [boolean] - whether to be loud with technical updates rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts mask_A [numpy boolean mask] - mask the desired band using an identical shape boolean mask. Useful for doing conditional zonal stats weighted [boolean] - apply weighted zonal calculations. This will determine the % overlap for each cell in the defined AOI. Will apply weights in calculations of numerical statistics RETURNS array of arrays, one for each feature in inShp ''' if isinstance(inShp, str): inVector = gpd.read_file(inShp) else: inVector = inShp if isinstance(inRaster, str): curRaster = rasterio.open(inRaster, 'r') else: curRaster = inRaster # If mask is not none, apply mask if mask_A is not None: curRaster.write_mask(mask_A) outputData = [] if inVector.crs != curRaster.crs: if reProj: inVector = inVector.to_crs(curRaster.crs) else: raise ValueError("Input CRS do not match") fCount = 0 tCount = len(inVector['geometry']) #generate bounding box geometry for raster bbox b = curRaster.bounds rBox = box(b[0], b[1], b[2], b[3]) for idx, row in inVector.iterrows(): geometry = row['geometry'] fCount = fCount + 1 try: #This test is used in case the geometry extends beyond the edge of the raster # I think it is computationally heavy, but I don't know of an easier way to do it if not rBox.contains(geometry): geometry = geometry.intersection(rBox) try: if fCount % 1000 == 0 and verbose: tPrint("Processing %s of %s" % (fCount, tCount)) # get pixel coordinates of the geometry's bounding box ul = curRaster.index(*geometry.bounds[0:2]) lr = curRaster.index(*geometry.bounds[2:4]) # read the subset of the data into a numpy array window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]), float(lr[1] + 1))) if mask_A is not None: data = curRaster.read(bandNum, window=window, masked=True) else: data = curRaster.read(bandNum, window=window, masked=False) if weighted: allTouched = True #Create a grid of the input raster (data) rGrid = polygonizeArray(data, geometry.bounds, curRaster) #Clip the grid by the input geometry rGrid['gArea'] = rGrid.area rGrid['newArea'] = rGrid.intersection(geometry).area #Store the percent overlap rGrid['w'] = rGrid['newArea'] / rGrid['gArea'] newData = data for idx, row in rGrid.iterrows(): newData[row['row'], row['col']] = data[row['row'], row['col']] * row['w'] data = newData # create an affine transform for the subset data t = curRaster.transform shifted_affine = Affine(t.a, t.b, t.c + ul[1] * t.a, t.d, t.e, t.f + lr[0] * t.e) # rasterize the geometry mask = rasterize([(geometry, 0)], out_shape=data.shape, transform=shifted_affine, fill=1, all_touched=allTouched, dtype=np.uint8) # create a masked numpy array masked_data = np.ma.array(data=data, mask=mask.astype(bool)) if rastType == 'N': if minVal != '' or maxVal != '': if minVal != '': masked_data = np.ma.masked_where( masked_data < minVal, masked_data) if maxVal != '': masked_data = np.ma.masked_where( masked_data > maxVal, masked_data) if masked_data.count() > 0: results = [ np.nansum(masked_data), np.nanmin(masked_data), np.nanmax(masked_data), np.nanmean(masked_data) ] else: results = [-1, -1, -1, -1] else: results = [ np.nansum(masked_data), np.nanmin(masked_data), np.nanmax(masked_data), np.nanmean(masked_data) ] if rastType == 'C': if len(unqVals) > 0: xx = dict(Counter(data.flatten())) results = [xx.get(i, 0) for i in unqVals] else: results = np.unique(masked_data, return_counts=True) outputData.append(results) except Exception as e: if verbose: print(e) if rastType == 'N': outputData.append([-1, -1, -1, -1]) else: outputData.append([-1 for x in unqVals]) except: print("Error processing %s" % fCount) return outputData
def zonalStats(inShp, inRaster, bandNum=1, reProj=False, minVal='', verbose=False, rastType='N', unqVals=[]): ''' Run zonal statistics against an input shapefile INPUT VARIABLES inShp [string] - path to input shapefile inRaster [string] - path to input raster OPTIONAL bandNum [integer] - band in raster to analyze reProj [boolean] - whether to reproject data to match, if not, raise an error minVal [number] - if defined, will only calculation statistics on values above this number verbose [boolean] - whether to be loud with responses rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts RETURNS array of arrays, one for each feature in inShp ''' outputData = [] with rasterio.open(inRaster, 'r') as curRaster: inVector = gpd.read_file(inShp) if inVector.crs != curRaster.crs: if reProj: inVector = inVector.to_crs(curRaster.crs) else: raise ValueError("Input CRS do not match") fCount = 0 tCount = len(inVector['geometry']) for geometry in inVector['geometry']: fCount = fCount + 1 if fCount % 1000 == 0 and verbose: tPrint("Processing %s of %s" % (fCount, tCount)) # get pixel coordinates of the geometry's bounding box ul = curRaster.index(*geometry.bounds[0:2]) lr = curRaster.index(*geometry.bounds[2:4]) # read the subset of the data into a numpy array window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]), float(lr[1] + 1))) try: data = curRaster.read(bandNum, window=window) # create an affine transform for the subset data t = curRaster.transform shifted_affine = Affine(t.a, t.b, t.c + ul[1] * t.a, t.d, t.e, t.f + lr[0] * t.e) # rasterize the geometry mask = rasterize([(geometry, 0)], out_shape=data.shape, transform=shifted_affine, fill=1, all_touched=True, dtype=np.uint8) # create a masked numpy array masked_data = np.ma.array(data=data, mask=mask.astype(bool)) if rastType == 'N': if minVal != '': masked_data = np.ma.masked_where( masked_data < minVal, masked_data) if masked_data.count() > 0: results = [ masked_data.sum(), masked_data.min(), masked_data.max(), masked_data.mean() ] else: results = [-1, -1, -1, -1] else: results = [ masked_data.sum(), masked_data.min(), masked_data.max(), masked_data.mean() ] if rastType == 'C': if len(unqVals) > 0: xx = dict(Counter(data.flatten())) results = [xx.get(i, 0) for i in unqVals] else: results = np.unique(masked_data, return_counts=True) outputData.append(results) except Exception as e: print(e) outputData.append([-1, -1, -1, -1]) return outputData