def extractRasterArray(rasters, paramstats, vector, vectorgeomtype, fid, gdalpath="", gdalcachemax="9000", systemcall=True, path=""): """Clip raster and store in ndarrays Parameters ---------- rasters : list list of rasters to analyse paramstats : dict list of statistics to compute (e.g. {1:'stats', 2:'rate'}) vector : string vector file for cutline opetation vectorgeomtype : int Type of geometry of input/output vector (http://portal.opengeospatial.org/files/?artifact_id=25355) fid : integer FID value to clip raster (cwhere parameter of gdalwarp) gdalpath : string gdal binaries path gdalcachemax : string gdal cache for wrapping operation (in Mb) systemcall : boolean if True, use os system call to execute gdalwarp (usefull to control gdal binaries version - gdalpath parameter) path : string temporary path to store temporary date if systemcall is True Return ---------- boolean if True, wrap operation well terminated ndarray ndarrays """ bands = [] todel = [] success = True # Get rasters resolution res = abs(fut.getRasterResolution(rasters[0])[0]) print(fid) # Get vector name vectorname = os.path.splitext(os.path.basename(vector))[0] for idx, raster in enumerate(rasters): # Value extraction if 'val' in list(paramstats.values()): if vectorgeomtype not in (1, 4, 1001, 1004): raise Exception("Type of input vector %s must be "\ "'Point' for pixel value extraction"%(vector)) else: bands.append(raster) todel = [] # Stats Extraction else: try: # TODO : test gdal version : >= 2.2.4 if systemcall: tmpfile = os.path.join( path, 'rast_%s_%s_%s' % (vectorname, str(fid), idx)) cmd = '%sgdalwarp -tr %s %s -tap -q -overwrite -cutline %s '\ '-crop_to_cutline --config GDAL_CACHEMAX %s -wm %s '\ '-wo "NUM_THREADS=ALL_CPUS" -wo "CUTLINE_ALL_TOUCHED=YES" '\ '-cwhere "FID=%s" %s %s -ot Float32'%(os.path.join(gdalpath, ''), \ res, \ res, \ vector, \ gdalcachemax, \ gdalcachemax, \ fid, \ raster, \ tmpfile) Utils.run(cmd) todel.append(tmpfile) else: gdal.SetConfigOption("GDAL_CACHEMAX", gdalcachemax) tmpfile = gdal.Warp('', raster, xRes=res, \ yRes=res, targetAlignedPixels=True, \ cutlineDSName=vector, cropToCutline=True, \ cutlineWhere="FID=%s"%(fid), format='MEM', \ warpMemoryLimit=gdalcachemax, \ warpOptions=[["NUM_THREADS=ALL_CPUS"], ["CUTLINE_ALL_TOUCHED=YES"]]) bands.append(tmpfile) todel = [] # store rasters in ndarray ndbands = storeRasterInArray(bands) except: success = False # Remove tmp rasters for filtodel in todel: os.remove(filtodel) if not success: nbbands = None return success, ndbands
def zonalstats(path, rasters, params, output, paramstats, classes="", bufferDist=None, gdalpath="", write_ouput=False, gdalcachemax="9000"): """Compute zonal statistitics (descriptive and categorical) on multi-band raster or multi-rasters based on Point (buffered or not) or Polygon zonal vector Parameters ---------- path : string working directory rasters : list list of rasters to analyse params : list list of fid list and vector file output : vector file (sqlite, shapefile and geojson) vector file to store statistitics paramstats : list list of statistics to compute (e.g. {1:'stats', 2:'rate'}) - paramstats = {1:"rate", 2:"statsmaj", 3:"statsmaj", 4:"stats", 2:stats_cl} - stats : mean_b, std_b, max_b, min_b - statsmaj : meanmaj, stdmaj, maxmaj, minmaj of majority class - rate : rate of each pixel value (classe names) - stats_cl : mean_cl, std_cl, max_cl, min_cl of one class - val : value of corresponding pixel (only for Point geometry and without other stats) classes : nomenclature file nomenclature bufferDist : int in case of point zonal vector : buffer size gdalpath : string path of gdal binaries (for system execution) write_ouput : boolean if True, wrapped raster are stored in working dir gdalcachemax : string gdal cache for wrapping operation (in Mb) """ # Features and vector file to intersect vector, idvals = params # Raster resolution # TODO : Check if all rasters have same extent and resolution res = abs(fut.getRasterResolution(rasters[0])[0]) # if no vector subsetting (all features) if not idvals: idvals = getFidList(vector) # vector open and iterate features and/or buffer geom vectorname = os.path.splitext(os.path.basename(vector))[0] vectorgeomtype = vf.getGeomType(vector) vectorbuff = None # Read statistics parameters if isinstance(paramstats, list): paramstats = dict([(x.split(':')[0], x.split(':')[1]) for x in paramstats]) # Value extraction if not bufferDist and vectorgeomtype in (1, 4, 1001, 1004): if 'val' in paramstats.values(): if vectorgeomtype == 1: schema = {'geometry': 'Point', 'properties': {}} elif vectorgeomtype == 4: schema = {'geometry': 'MultiPoint', 'properties': {}} else: raise Exception("Only pixel value extraction available "\ "when Point geometry without buffer distance is provided") # Stats extraction else: # Point geometry if vectorgeomtype in (1, 4, 1001, 1004): if vectorgeomtype == 1: schema = {'geometry': 'Point', 'properties': {}} elif vectorgeomtype == 4: schema = {'geometry': 'MultiPoint', 'properties': {}} vectorbuff = vectorname + "buff.shp" _ = bfo.bufferPoly(vector, vectorbuff, bufferDist=bufferDist) # Polygon geometry elif vectorgeomtype in (3, 6, 1003, 1006): if vectorgeomtype == 3: schema = {'geometry': 'Polygon', 'properties': {}} elif vectorgeomtype == 6: schema = {'geometry': 'MultiPolygon', 'properties': {}} else: raise Exception("Geometry type of vector file not handled") # Vector reading dataset = vf.openToRead(vector) lyr = dataset.GetLayer() spatialref = lyr.GetSpatialRef().ExportToProj4() # Prepare stats DataFrame stats = definePandasDf(idvals, paramstats, classes) # Iterate vector's features (FID) for idval in idvals: lyr.SetAttributeFilter("FID=" + str(idval)) feat = lyr.GetNextFeature() geom = feat.GetGeometryRef() if geom: # Insert geometry in DataFrame geomdf = pad.DataFrame(index=[idval], \ columns=["geometry"], \ data=[str(geom.ExportToWkt())]) # Get Point coordinates (pixel value case) if vectorgeomtype in (1, 4, 1001, 1004) and 'val' in paramstats.values(): xpt, ypt, _ = geom.GetPoint() stats.update(geomdf) if vectorbuff: vector = vectorbuff # creation of wrapped rasters if gdalpath != "" and gdalpath is not None: gdalpath = gdalpath + "/" else: gdalpath = "" bands = [] success = True for idx, raster in enumerate(rasters): # Value extraction if 'val' in paramstats.values(): if vectorgeomtype not in (1, 4, 1001, 1004): raise Exception("Type of input vector %s must be "\ "'Point' for pixel value extraction"%(vector)) else: bands.append(raster) tmpfile = raster # Stats Extraction else: tmpfile = os.path.join( path, 'rast_%s_%s_%s' % (vectorname, str(idval), idx)) try: # TODO : test gdal version : >= 2.2.4 if write_ouput: cmd = '%sgdalwarp -tr %s %s -tap -q -overwrite -cutline %s '\ '-crop_to_cutline --config GDAL_CACHEMAX %s -wm %s '\ '-wo "NUM_THREADS=ALL_CPUS" -wo "CUTLINE_ALL_TOUCHED=YES" "\ "-cwhere "FID=%s" %s %s -ot Float32' %(gdalpath, \ res, \ res, \ vector, \ gdalcachemax, \ gdalcachemax, \ idval, \ raster, \ tmpfile) Utils.run(cmd) else: gdal.SetConfigOption("GDAL_CACHEMAX", gdalcachemax) tmpfile = gdal.Warp('', raster, xRes=res, \ yRes=res, targetAlignedPixels=True, \ cutlineDSName=vector, cropToCutline=True, \ cutlineWhere="FID=%s"%(idval), format='MEM', \ warpMemoryLimit=gdalcachemax, \ warpOptions=[["NUM_THREADS=ALL_CPUS"], ["CUTLINE_ALL_TOUCHED=YES"]]) bands.append(tmpfile) success = True except: success = False pass if success: for param in paramstats: # Multi-raster / Multi-band data preparation if len(rasters) != 1: band = bands[int(param) - 1] nbband = 1 else: band = tmpfile nbband = int(param) # Statistics extraction if band: methodstat = paramstats[param] if methodstat == 'rate': classStats, classmaj, posclassmaj = countPixelByClass( band, idval, nbband) stats.update(classStats) # Add columns when pixel values are not identified in nomenclature file if list(classStats.columns) != list(stats.columns): newcols = list( set(list(classStats.columns)).difference( set(list(stats.columns)))) pad.concat([stats, classStats[newcols]], axis=1) elif methodstat == 'stats': cols = ["meanb%s"%(int(param)), "stdb%s"%(int(param)), \ "maxb%s"%(int(param)), "minb%s"%(int(param))] stats.update(pad.DataFrame(data=[rasterStats(band, nbband)], \ index=[idval], \ columns=cols)) elif methodstat == 'statsmaj': if not classmaj: if "rate" in paramstats.values(): idxbdclasses = [ x for x in paramstats if paramstats[x] == "rate" ][0] if len(rasters) != 1: bandrate = bands[idxbdclasses - 1] nbbandrate = 0 else: bandrate = band nbbandrate = idxbdclasses - 1 else: raise Exception("No classification raster provided "\ "to check position of majority class") classStats, classmaj, posclassmaj = countPixelByClass( bandrate, idval, nbbandrate) classStats = None cols = ["meanmajb%s"%(int(param)), "stdmajb%s"%(int(param)), \ "maxmajb%s"%(int(param)), "minmajb%s"%(int(param))] stats.update(pad.DataFrame(data=[rasterStats(band, nbband, posclassmaj)], \ index=[idval], \ columns=cols)) elif "stats_" in methodstat: if "rate" in paramstats.values(): # get positions of class cl = paramstats[param].split('_')[1] idxbdclasses = [ x for x in paramstats if paramstats[x] == "rate" ][0] rastertmp = gdal.Open(bands[idxbdclasses - 1], 0) data = rastertmp.ReadAsArray() posclass = np.where(data == int(cl)) data = None else: raise Exception("No classification raster provided "\ "to check position of requested class") cols = ["meanb%sc%s"%(int(param), cl), "stdb%sc%s"%(int(param), cl), \ "maxb%sc%s"%(int(param), cl), "minb%sc%s"%(int(param), cl)] stats.update(pad.DataFrame(data=[rasterStats(band, nbband, posclass)], \ index=[idval], \ columns=cols)) elif "val" in methodstat: colpt, rowpt = fut.geoToPix(band, xpt, ypt) cols = "valb%s" % (param) stats.update(pad.DataFrame(data=[rasterStats(band, nbband, None, (colpt, rowpt))], \ index=[idval], \ columns=[cols])) else: print("The method %s is not implemented" % (paramstats[param])) band = None if write_ouput: os.remove(tmpfile) else: print( "gdalwarp problem for feature %s (geometry error, too small area, etc.)" % (idval)) # Prepare geometry and projection stats["geometry"] = stats["geometry"].apply(wkt.loads) statsfinal = gpad.GeoDataFrame(stats, geometry="geometry") statsfinal.fillna(0, inplace=True) statsfinal.crs = {'init': 'proj4:%s' % (spatialref)} # change column names if rate stats expected and nomenclature file is provided if "rate" in paramstats and classes != "": # get multi-level nomenclature # classes="/home/qt/thierionv/iota2/iota2/scripts/simplification/nomenclature17.cfg" nomenc = nomenclature.Iota2Nomenclature(classes, 'cfg') desclasses = nomenc.HierarchicalNomenclature.get_level_values( nomenc.getLevelNumber() - 1) cols = [(str(x), str(z)) for x, y, w, z in desclasses] # rename columns with alias for col in cols: statsfinal.rename(columns={col[0]: col[1].decode('utf8')}, inplace=True) # change columns type schema['properties'] = OrderedDict([(x, 'float:10.2') for x in list(statsfinal.columns) \ if x != 'geometry']) # exportation # TO TEST # TODO Export format depending on columns number (shapefile, sqlite, geojson) # Check Issue on framagit convert = False outformat = os.path.splitext(output)[1] if outformat == ".shp": driver = "ESRI Shapefile" elif outformat == ".geojson": driver = "GeoJSON" elif outformat == ".sqlite": driver = "ESRI Shapefile" convert = True else: raise Exception("The output format '%s' is not handled" % (outformat[1:])) if not convert: statsfinal.to_file(output, driver=driver, schema=schema, encoding='utf-8') else: outputinter = os.path.splitext(output)[0] + '.shp' statsfinal.to_file(outputinter, driver=driver, schema=schema, encoding='utf-8') output = os.path.splitext(output)[0] + '.sqlite' Utils.run('ogr2ogr -f SQLite %s %s' % (output, outputinter))
def checkmethodstats(rasters, paramstats, nbbands): """Store list of requested statistics in dict and check validity of in put rasters Parameters ---------- rasters : list list of rasters to analyse paramstats : list list of statistics to compute (e.g. [[1,'stats'], [2, 'rate']] or ['val']) nbbands : int number of input rasters or bands of input raster Return ---------- paramstats : dict list of statistics to compute (e.g. {1:'stats', 2:'rate'}) """ # Format requested statistics if isinstance(paramstats, list): # List of methods (bash) if ':' in paramstats[0]: paramstats = dict([(x.split(':')[0], x.split(':')[1]) for x in paramstats]) # Unique method without band / raster number elif len(paramstats) == 1: # Build statistics method dictionary tmpdict = {} for idx in range(nbbands): tmpdict[idx + 1] = str(paramstats[0]) paramstats = tmpdict # Check statistics methods validity for keys in paramstats: if 'stats_' in paramstats[keys]: paramstats[keys] = 'stats' if paramstats[keys] not in ('stats', 'statsmaj', 'rate', 'val'): raise Exception('The method %s is not implemented' % (paramstats[0])) # requested stats and band number ? maxband = max([int(x) for x in list(paramstats.keys())]) if len(rasters) != 1: if nbbands < maxband: raise Exception("Band ids in requested stats and number of input rasters "\ "or bands number of input raster do not correspond") # same extent and resolution of input rasters ? listres = [] listextent = [] if len(rasters) != 1: for raster in rasters: listres.append(abs(fut.getRasterResolution(raster)[0])) listextent.append(fut.getRasterExtent(raster)) if listextent[1:] != listextent[:-1]: raise Exception("Input rasters must have same extent") if listres[1:] != listres[:-1]: raise Exception("Input rasters must have same spatial resolution") return paramstats