Example #1
0
def zonalStats(inShp,
               inRaster,
               bandNum=1,
               mask_A=None,
               reProj=False,
               minVal='',
               maxVal='',
               verbose=False,
               rastType='N',
               unqVals=[],
               weighted=False,
               allTouched=False):
    ''' Run zonal statistics against an input shapefile. Returns array of SUM, MIN, MAX, and MEAN

    INPUT VARIABLES
    inShp [string or geopandas object] - path to input shapefile
    inRaster [string or rasterio object] - path to input raster

    OPTIONAL
    bandNum [integer] - band in raster to analyze
    reProj [boolean] -  whether to reproject data to match, if not, raise an error
    minVal/maxVal [number] - if defined, will only calculate statistics on values above or below this number
    verbose [boolean] - whether to be loud with technical updates
    rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers
    unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts
    mask_A [numpy boolean mask] - mask the desired band using an identical shape boolean mask. Useful for doing conditional zonal stats
    weighted [boolean] - apply weighted zonal calculations. This will determine the % overlap for each
        cell in the defined AOI. Will apply weights in calculations of numerical statistics
    
    RETURNS
    array of arrays, one for each feature in inShp
    '''
    if isinstance(inShp, str):
        inVector = gpd.read_file(inShp)
    else:
        inVector = inShp
    if isinstance(inRaster, str):
        curRaster = rasterio.open(inRaster, 'r')
    else:
        curRaster = inRaster

    # If mask is not none, apply mask
    if mask_A is not None:
        curRaster.write_mask(mask_A)

    outputData = []
    if inVector.crs != curRaster.crs:
        if reProj:
            inVector = inVector.to_crs(curRaster.crs)
        else:
            raise ValueError("Input CRS do not match")
    fCount = 0
    tCount = len(inVector['geometry'])
    #generate bounding box geometry for raster bbox
    b = curRaster.bounds
    rBox = box(b[0], b[1], b[2], b[3])
    for idx, row in inVector.iterrows():
        geometry = row['geometry']
        fCount = fCount + 1
        try:
            #This test is used in case the geometry extends beyond the edge of the raster
            #   I think it is computationally heavy, but I don't know of an easier way to do it
            if not rBox.contains(geometry):
                geometry = geometry.intersection(rBox)
            try:
                if fCount % 1000 == 0 and verbose:
                    tPrint("Processing %s of %s" % (fCount, tCount))
                # get pixel coordinates of the geometry's bounding box
                ul = curRaster.index(*geometry.bounds[0:2])
                lr = curRaster.index(*geometry.bounds[2:4])
                # read the subset of the data into a numpy array
                window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]),
                                                             float(lr[1] + 1)))

                if mask_A is not None:
                    data = curRaster.read(bandNum, window=window, masked=True)
                else:
                    data = curRaster.read(bandNum, window=window, masked=False)

                if weighted:
                    allTouched = True
                    #Create a grid of the input raster (data)
                    rGrid = polygonizeArray(data, geometry.bounds, curRaster)
                    #Clip the grid by the input geometry
                    rGrid['gArea'] = rGrid.area
                    rGrid['newArea'] = rGrid.intersection(geometry).area
                    #Store the percent overlap
                    rGrid['w'] = rGrid['newArea'] / rGrid['gArea']
                    newData = data
                    for idx, row in rGrid.iterrows():
                        newData[row['row'],
                                row['col']] = data[row['row'],
                                                   row['col']] * row['w']
                    data = newData

                # create an affine transform for the subset data
                t = curRaster.transform
                shifted_affine = Affine(t.a, t.b, t.c + ul[1] * t.a, t.d, t.e,
                                        t.f + lr[0] * t.e)

                # rasterize the geometry
                mask = rasterize([(geometry, 0)],
                                 out_shape=data.shape,
                                 transform=shifted_affine,
                                 fill=1,
                                 all_touched=allTouched,
                                 dtype=np.uint8)

                # create a masked numpy array
                masked_data = np.ma.array(data=data, mask=mask.astype(bool))
                if rastType == 'N':
                    if minVal != '' or maxVal != '':
                        if minVal != '':
                            masked_data = np.ma.masked_where(
                                masked_data < minVal, masked_data)
                        if maxVal != '':
                            masked_data = np.ma.masked_where(
                                masked_data > maxVal, masked_data)
                        if masked_data.count() > 0:
                            results = [
                                np.nansum(masked_data),
                                np.nanmin(masked_data),
                                np.nanmax(masked_data),
                                np.nanmean(masked_data)
                            ]
                        else:
                            results = [-1, -1, -1, -1]
                    else:
                        results = [
                            np.nansum(masked_data),
                            np.nanmin(masked_data),
                            np.nanmax(masked_data),
                            np.nanmean(masked_data)
                        ]
                if rastType == 'C':
                    if len(unqVals) > 0:
                        xx = dict(Counter(data.flatten()))
                        results = [xx.get(i, 0) for i in unqVals]
                    else:
                        results = np.unique(masked_data, return_counts=True)
                outputData.append(results)
            except Exception as e:
                if verbose:
                    print(e)
                if rastType == 'N':
                    outputData.append([-1, -1, -1, -1])
                else:
                    outputData.append([-1 for x in unqVals])
        except:
            print("Error processing %s" % fCount)
    return outputData
Example #2
0
def zonalStats(inShp,
               inRaster,
               bandNum=1,
               reProj=False,
               minVal='',
               verbose=False,
               rastType='N',
               unqVals=[]):
    ''' Run zonal statistics against an input shapefile
    
    INPUT VARIABLES
    inShp [string] - path to input shapefile
    inRaster [string] - path to input raster
    
    OPTIONAL
    bandNum [integer] - band in raster to analyze
    reProj [boolean] -  whether to reproject data to match, if not, raise an error
    minVal [number] - if defined, will only calculation statistics on values above this number
    verbose [boolean] - whether to be loud with responses
    rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers
    unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts
    
    RETURNS
    array of arrays, one for each feature in inShp
    '''

    outputData = []
    with rasterio.open(inRaster, 'r') as curRaster:
        inVector = gpd.read_file(inShp)
        if inVector.crs != curRaster.crs:
            if reProj:
                inVector = inVector.to_crs(curRaster.crs)
            else:
                raise ValueError("Input CRS do not match")
        fCount = 0
        tCount = len(inVector['geometry'])
        for geometry in inVector['geometry']:
            fCount = fCount + 1
            if fCount % 1000 == 0 and verbose:
                tPrint("Processing %s of %s" % (fCount, tCount))
            # get pixel coordinates of the geometry's bounding box
            ul = curRaster.index(*geometry.bounds[0:2])
            lr = curRaster.index(*geometry.bounds[2:4])

            # read the subset of the data into a numpy array
            window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]),
                                                         float(lr[1] + 1)))
            try:
                data = curRaster.read(bandNum, window=window)
                # create an affine transform for the subset data
                t = curRaster.transform
                shifted_affine = Affine(t.a, t.b, t.c + ul[1] * t.a, t.d, t.e,
                                        t.f + lr[0] * t.e)

                # rasterize the geometry
                mask = rasterize([(geometry, 0)],
                                 out_shape=data.shape,
                                 transform=shifted_affine,
                                 fill=1,
                                 all_touched=True,
                                 dtype=np.uint8)

                # create a masked numpy array
                masked_data = np.ma.array(data=data, mask=mask.astype(bool))
                if rastType == 'N':
                    if minVal != '':
                        masked_data = np.ma.masked_where(
                            masked_data < minVal, masked_data)
                        if masked_data.count() > 0:
                            results = [
                                masked_data.sum(),
                                masked_data.min(),
                                masked_data.max(),
                                masked_data.mean()
                            ]
                        else:
                            results = [-1, -1, -1, -1]
                    else:
                        results = [
                            masked_data.sum(),
                            masked_data.min(),
                            masked_data.max(),
                            masked_data.mean()
                        ]
                if rastType == 'C':
                    if len(unqVals) > 0:
                        xx = dict(Counter(data.flatten()))
                        results = [xx.get(i, 0) for i in unqVals]
                    else:
                        results = np.unique(masked_data, return_counts=True)
                outputData.append(results)
            except Exception as e:
                print(e)
                outputData.append([-1, -1, -1, -1])
    return outputData