def iota2Formatting(invector, classes, outvector=""): ''' python simplification/ZonalStats.py -wd ~/tmp/ -inr /work/OT/theia/oso/vincent/testmpi/mini_SAR_pad/final/Classif_Seed_0.tif /work/OT/theia/oso/vincent/testmpi/mini_SAR_pad/final/Confidence_Seed_0.tif /work/OT/theia/oso/vincent/testmpi/mini_SAR_pad/final/PixelsValidity.tif -shape /work/OT/theia/oso/vincent/testmpi/mini_SAR_pad/final/simplification/vectors/dept_1.shp -output /work/OT/theia/oso/vincent/outstats_oso.sqlite -params 1:rate 2:statsmaj 3:statsmaj -classes simplification/nomenclature17.cfg -iota2 ''' def Sort(sub_li): sub_li.sort(key=lambda x: x[0]) return sub_li nomenc = nomenclature.Iota2Nomenclature(classes, 'cfg') desclasses = nomenc.HierarchicalNomenclature.get_level_values( int(nomenc.getLevelNumber() - 1)) cols = [[x, str(z)] for x, y, w, z in desclasses] sortalias = [x[1] for x in Sort(cols)] exp = "" for name in sortalias: exp += "CAST(%s AS NUMERIC(6,2)) AS %s, " % (name, name) if outvector == "": layerout = os.path.splitext(os.path.basename(invector))[0] outvector = os.path.splitext(invector)[0] + '_tmp.shp' else: layerout = os.path.splitext(os.path.basename(outvector))[0] command = "ogr2ogr -lco ENCODING=UTF-8 -overwrite -q -f 'ESRI Shapefile' -nln %s -sql "\ "'SELECT CAST(cat AS INTEGER(4)) AS Classe, "\ "CAST(meanmajb3 AS INTEGER(4)) AS Validmean, "\ "CAST(stdmajb3 AS NUMERIC(6,2)) AS Validstd, "\ "CAST(meanmajb2 AS INTEGER(4)) AS Confidence, %s"\ "CAST(area AS NUMERIC(10,2)) AS Aire "\ "FROM %s' "\ "%s %s"%(layerout, exp, layerout, outvector, invector) Utils.run(command)
def formatDataFrame(geodataframe, schema, categorical=False, classes="", floatdec=2, intsize=10): """Format columns name and format of a GeoPandas DataFrame Parameters ---------- geodataframe : GeoPandas DataFrame GeoPandas DataFrame schema : dict / Fiona schema schema giving colums name and format categorical : boolean if True, use a input nomenclature (Iota2Nomenclature) to use alias renaming classes : Iota2Nomenclature Nomencalture description (see nomenclature class for input) floatdec : integer length of decimal part of columns values intsize : integer length of integer part of columns values Return ---------- GeoPandas DataFrame GeoPandas schema """ # change column names if rate stats expected and nomenclature file is provided if categorical: # get multi-level nomenclature # TODO : several type of input nomenclature (cf. nomenclature class) nomenc = nomenclature.Iota2Nomenclature(classes, 'cfg') desclasses = nomenc.HierarchicalNomenclature.get_level_values( int(nomenc.getLevelNumber() - 1)) cols = [(str(x), str(z)) for x, y, w, z in desclasses] # rename columns with alias for col in cols: #geodataframe.rename(columns={col[0]:col[1].decode('utf8')}, inplace=True) geodataframe.rename(columns={col[0]: col[1]}, inplace=True) # change columns type schema['properties'] = OrderedDict([(x, 'float:%s.%s'%(intsize, floatdec)) for x in list(geodataframe.columns) \ if x != 'geometry']) return geodataframe, schema
def getMaskRegularisation(classes): nomenc = nomenclature.Iota2Nomenclature(classes, 'cfg') df = nomenc.HierarchicalNomenclature.to_frame().groupby(level=0) masks = [] for idx, key in enumerate(df.groups.keys()): exp = "(im1b1==" listclasses = [] for elt in df.groups[key].to_frame().groupby(level=1).groups.keys(): listclasses.append(str(elt[0])) exp += " || im1b1==".join(listclasses) exp += ")?im1b1:0" output = "mask_%s.tif" % (idx) masks.append([idx, exp, output, len(df.groups[key]) != 1]) return masks
def zonalstats(path, rasters, params, output, paramstats, classes="", bufferDist=None, gdalpath="", write_ouput=False, gdalcachemax="9000"): """Compute zonal statistitics (descriptive and categorical) on multi-band raster or multi-rasters based on Point (buffered or not) or Polygon zonal vector Parameters ---------- path : string working directory rasters : list list of rasters to analyse params : list list of fid list and vector file output : vector file (sqlite, shapefile and geojson) vector file to store statistitics paramstats : list list of statistics to compute (e.g. {1:'stats', 2:'rate'}) - paramstats = {1:"rate", 2:"statsmaj", 3:"statsmaj", 4:"stats", 2:stats_cl} - stats : mean_b, std_b, max_b, min_b - statsmaj : meanmaj, stdmaj, maxmaj, minmaj of majority class - rate : rate of each pixel value (classe names) - stats_cl : mean_cl, std_cl, max_cl, min_cl of one class - val : value of corresponding pixel (only for Point geometry and without other stats) classes : nomenclature file nomenclature bufferDist : int in case of point zonal vector : buffer size gdalpath : string path of gdal binaries (for system execution) write_ouput : boolean if True, wrapped raster are stored in working dir gdalcachemax : string gdal cache for wrapping operation (in Mb) """ # Features and vector file to intersect vector, idvals = params # Raster resolution # TODO : Check if all rasters have same extent and resolution res = abs(fut.getRasterResolution(rasters[0])[0]) # if no vector subsetting (all features) if not idvals: idvals = getFidList(vector) # vector open and iterate features and/or buffer geom vectorname = os.path.splitext(os.path.basename(vector))[0] vectorgeomtype = vf.getGeomType(vector) vectorbuff = None # Read statistics parameters if isinstance(paramstats, list): paramstats = dict([(x.split(':')[0], x.split(':')[1]) for x in paramstats]) # Value extraction if not bufferDist and vectorgeomtype in (1, 4, 1001, 1004): if 'val' in paramstats.values(): if vectorgeomtype == 1: schema = {'geometry': 'Point', 'properties': {}} elif vectorgeomtype == 4: schema = {'geometry': 'MultiPoint', 'properties': {}} else: raise Exception("Only pixel value extraction available "\ "when Point geometry without buffer distance is provided") # Stats extraction else: # Point geometry if vectorgeomtype in (1, 4, 1001, 1004): if vectorgeomtype == 1: schema = {'geometry': 'Point', 'properties': {}} elif vectorgeomtype == 4: schema = {'geometry': 'MultiPoint', 'properties': {}} vectorbuff = vectorname + "buff.shp" _ = bfo.bufferPoly(vector, vectorbuff, bufferDist=bufferDist) # Polygon geometry elif vectorgeomtype in (3, 6, 1003, 1006): if vectorgeomtype == 3: schema = {'geometry': 'Polygon', 'properties': {}} elif vectorgeomtype == 6: schema = {'geometry': 'MultiPolygon', 'properties': {}} else: raise Exception("Geometry type of vector file not handled") # Vector reading dataset = vf.openToRead(vector) lyr = dataset.GetLayer() spatialref = lyr.GetSpatialRef().ExportToProj4() # Prepare stats DataFrame stats = definePandasDf(idvals, paramstats, classes) # Iterate vector's features (FID) for idval in idvals: lyr.SetAttributeFilter("FID=" + str(idval)) feat = lyr.GetNextFeature() geom = feat.GetGeometryRef() if geom: # Insert geometry in DataFrame geomdf = pad.DataFrame(index=[idval], \ columns=["geometry"], \ data=[str(geom.ExportToWkt())]) # Get Point coordinates (pixel value case) if vectorgeomtype in (1, 4, 1001, 1004) and 'val' in paramstats.values(): xpt, ypt, _ = geom.GetPoint() stats.update(geomdf) if vectorbuff: vector = vectorbuff # creation of wrapped rasters if gdalpath != "" and gdalpath is not None: gdalpath = gdalpath + "/" else: gdalpath = "" bands = [] success = True for idx, raster in enumerate(rasters): # Value extraction if 'val' in paramstats.values(): if vectorgeomtype not in (1, 4, 1001, 1004): raise Exception("Type of input vector %s must be "\ "'Point' for pixel value extraction"%(vector)) else: bands.append(raster) tmpfile = raster # Stats Extraction else: tmpfile = os.path.join( path, 'rast_%s_%s_%s' % (vectorname, str(idval), idx)) try: # TODO : test gdal version : >= 2.2.4 if write_ouput: cmd = '%sgdalwarp -tr %s %s -tap -q -overwrite -cutline %s '\ '-crop_to_cutline --config GDAL_CACHEMAX %s -wm %s '\ '-wo "NUM_THREADS=ALL_CPUS" -wo "CUTLINE_ALL_TOUCHED=YES" "\ "-cwhere "FID=%s" %s %s -ot Float32' %(gdalpath, \ res, \ res, \ vector, \ gdalcachemax, \ gdalcachemax, \ idval, \ raster, \ tmpfile) Utils.run(cmd) else: gdal.SetConfigOption("GDAL_CACHEMAX", gdalcachemax) tmpfile = gdal.Warp('', raster, xRes=res, \ yRes=res, targetAlignedPixels=True, \ cutlineDSName=vector, cropToCutline=True, \ cutlineWhere="FID=%s"%(idval), format='MEM', \ warpMemoryLimit=gdalcachemax, \ warpOptions=[["NUM_THREADS=ALL_CPUS"], ["CUTLINE_ALL_TOUCHED=YES"]]) bands.append(tmpfile) success = True except: success = False pass if success: for param in paramstats: # Multi-raster / Multi-band data preparation if len(rasters) != 1: band = bands[int(param) - 1] nbband = 1 else: band = tmpfile nbband = int(param) # Statistics extraction if band: methodstat = paramstats[param] if methodstat == 'rate': classStats, classmaj, posclassmaj = countPixelByClass( band, idval, nbband) stats.update(classStats) # Add columns when pixel values are not identified in nomenclature file if list(classStats.columns) != list(stats.columns): newcols = list( set(list(classStats.columns)).difference( set(list(stats.columns)))) pad.concat([stats, classStats[newcols]], axis=1) elif methodstat == 'stats': cols = ["meanb%s"%(int(param)), "stdb%s"%(int(param)), \ "maxb%s"%(int(param)), "minb%s"%(int(param))] stats.update(pad.DataFrame(data=[rasterStats(band, nbband)], \ index=[idval], \ columns=cols)) elif methodstat == 'statsmaj': if not classmaj: if "rate" in paramstats.values(): idxbdclasses = [ x for x in paramstats if paramstats[x] == "rate" ][0] if len(rasters) != 1: bandrate = bands[idxbdclasses - 1] nbbandrate = 0 else: bandrate = band nbbandrate = idxbdclasses - 1 else: raise Exception("No classification raster provided "\ "to check position of majority class") classStats, classmaj, posclassmaj = countPixelByClass( bandrate, idval, nbbandrate) classStats = None cols = ["meanmajb%s"%(int(param)), "stdmajb%s"%(int(param)), \ "maxmajb%s"%(int(param)), "minmajb%s"%(int(param))] stats.update(pad.DataFrame(data=[rasterStats(band, nbband, posclassmaj)], \ index=[idval], \ columns=cols)) elif "stats_" in methodstat: if "rate" in paramstats.values(): # get positions of class cl = paramstats[param].split('_')[1] idxbdclasses = [ x for x in paramstats if paramstats[x] == "rate" ][0] rastertmp = gdal.Open(bands[idxbdclasses - 1], 0) data = rastertmp.ReadAsArray() posclass = np.where(data == int(cl)) data = None else: raise Exception("No classification raster provided "\ "to check position of requested class") cols = ["meanb%sc%s"%(int(param), cl), "stdb%sc%s"%(int(param), cl), \ "maxb%sc%s"%(int(param), cl), "minb%sc%s"%(int(param), cl)] stats.update(pad.DataFrame(data=[rasterStats(band, nbband, posclass)], \ index=[idval], \ columns=cols)) elif "val" in methodstat: colpt, rowpt = fut.geoToPix(band, xpt, ypt) cols = "valb%s" % (param) stats.update(pad.DataFrame(data=[rasterStats(band, nbband, None, (colpt, rowpt))], \ index=[idval], \ columns=[cols])) else: print("The method %s is not implemented" % (paramstats[param])) band = None if write_ouput: os.remove(tmpfile) else: print( "gdalwarp problem for feature %s (geometry error, too small area, etc.)" % (idval)) # Prepare geometry and projection stats["geometry"] = stats["geometry"].apply(wkt.loads) statsfinal = gpad.GeoDataFrame(stats, geometry="geometry") statsfinal.fillna(0, inplace=True) statsfinal.crs = {'init': 'proj4:%s' % (spatialref)} # change column names if rate stats expected and nomenclature file is provided if "rate" in paramstats and classes != "": # get multi-level nomenclature # classes="/home/qt/thierionv/iota2/iota2/scripts/simplification/nomenclature17.cfg" nomenc = nomenclature.Iota2Nomenclature(classes, 'cfg') desclasses = nomenc.HierarchicalNomenclature.get_level_values( nomenc.getLevelNumber() - 1) cols = [(str(x), str(z)) for x, y, w, z in desclasses] # rename columns with alias for col in cols: statsfinal.rename(columns={col[0]: col[1].decode('utf8')}, inplace=True) # change columns type schema['properties'] = OrderedDict([(x, 'float:10.2') for x in list(statsfinal.columns) \ if x != 'geometry']) # exportation # TO TEST # TODO Export format depending on columns number (shapefile, sqlite, geojson) # Check Issue on framagit convert = False outformat = os.path.splitext(output)[1] if outformat == ".shp": driver = "ESRI Shapefile" elif outformat == ".geojson": driver = "GeoJSON" elif outformat == ".sqlite": driver = "ESRI Shapefile" convert = True else: raise Exception("The output format '%s' is not handled" % (outformat[1:])) if not convert: statsfinal.to_file(output, driver=driver, schema=schema, encoding='utf-8') else: outputinter = os.path.splitext(output)[0] + '.shp' statsfinal.to_file(outputinter, driver=driver, schema=schema, encoding='utf-8') output = os.path.splitext(output)[0] + '.sqlite' Utils.run('ogr2ogr -f SQLite %s %s' % (output, outputinter))
def definePandasDf(idvals, paramstats={}, classes=""): """Define DataFrame (columns and index values) based on expected statistics and zonal vector Parameters ---------- idvals : list list of FID to analyse (DataFrame storage) paramstats : dict list of statistics to compute (e.g. {1:'stats', 2:'rate'}) classes : nomenclature file nomenclature Return ------ geopandas.GeoDataFrame """ cols = [] for param in paramstats: if paramstats[param] == "rate": if classes != "": nomenc = nomenclature.Iota2Nomenclature(classes, 'cfg') desclasses = nomenc.HierarchicalNomenclature.get_level_values( nomenc.getLevelNumber() - 1) [cols.append(str(x)) for x, y, w, z in desclasses] elif paramstats[param] == "stats": [ cols.append(x) for x in [ "meanb%s" % (param), "stdb%s" % (param), "maxb%s" % (param), "minb%s" % (param) ] ] elif paramstats[param] == "statsmaj": [ cols.append(x) for x in [ "meanmajb%s" % (param), "stdmajb%s" % (param), "maxmajb%s" % (param), "minmajb%s" % (param) ] ] elif "stats_" in paramstats[param]: cl = paramstats[param].split('_')[1] [ cols.append(x) for x in [ "meanb%sc%s" % (param, cl), "stdb%sc%s" % (param, cl), "maxb%sc%s" % (param, cl), "minb%sc%s" % (param, cl) ] ] elif "val" in paramstats[param]: [cols.append("valb%s" % (param))] else: raise Exception("The method %s is not implemented") % ( paramstats[param]) cols.append('geometry') return gpad.GeoDataFrame(np.nan, index=idvals, columns=cols)