def segQuality(inVector, inImage): # open the vector lyr = fiona.open(inVector) features = [x for x in lyr] values = np.zeros([len(features), 5], dtype=float) # loop over features for i in range(len(features)): geometry1 = shape(features[i]['geometry']) restFeatures = features[:i] + features[(i+ 1):] value = zonal_stats(geometry1, inImage, stats=['count'], add_stats={'mymean':mymean, "myvarianza":varianza } ) df = pd.DataFrame.from_dict(value, orient='columns', dtype=None) for j in range(len(restFeatures)): geometry2 = shape(features[j]['geometry']) if geometry2.intersects(geometry1) == True: #print("They touch") value = zonal_stats(geometry2, inImage, stats=['count'], add_stats={'mymean':mymean, "myvarianza":varianza } ) df = df.append(pd.DataFrame.from_dict(value, orient='columns', dtype=None)) values[i,0] = df.iloc[0,0] # count values[i,1] = df.iloc[0,1] # mean values[i,2] = df.iloc[0,2] # myvarianza values[i,3] = np.var(df.iloc[:,1]) # varianza between values[i,4] = len(df.iloc[1:]) # neighbours # get overal values intraVarWeighted = np.nansum( values[:,0]*values[:,2] ) / np.nansum(values[:,0]) interVarWeighted = np.nansum( values[:,4]*values[:,3] ) / np.nansum(values[:,4]) normVariance = (intraVarWeighted - interVarWeighted) / (intraVarWeighted + interVarWeighted) numberSegments = len(values[:,4]) return( intraVarWeighted, interVarWeighted, normVariance, numberSegments )
def test_nan_counts(): from affine import Affine transform = Affine(1, 0, 1, 0, -1, 3) data = np.array([ [np.nan, np.nan, np.nan], [0, 0, 0], [1, 4, 5] ]) # geom extends an additional row to left geom = 'POLYGON ((1 0, 4 0, 4 3, 1 3, 1 0))' # nan stat is requested stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*") for res in stats: assert res['count'] == 3 # 3 pixels of valid data assert res['nodata'] == 3 # 3 pixels of nodata assert res['nan'] == 3 # 3 pixels of nans # nan are ignored if nan stat is not requested stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="count nodata") for res in stats: assert res['count'] == 3 # 3 pixels of valid data assert res['nodata'] == 3 # 3 pixels of nodata assert 'nan' not in res
def test_mini_raster(): polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, raster, raster_out=True) stats2 = zonal_stats(polygons, stats[0]['mini_raster_array'], raster_out=True, affine=stats[0]['mini_raster_affine']) assert (stats[0]['mini_raster_array'] == stats2[0]['mini_raster_array']).sum() == \ stats[0]['count']
def test_nopoints(): with pytest.raises(TypeError): shapely_to_ogr_type('Point') with pytest.raises(TypeError): shapely_to_ogr_type('MultiPoint') zonal_stats(geoms, raster, global_src_extent=True)
def test_mini_raster(): polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, raster, raster_out=True) stats2 = zonal_stats(polygons, stats[0]['mini_raster'], raster_out=True, transform=stats[0]['mini_raster_GT']) assert (stats[0]['mini_raster'] == stats2[0]['mini_raster']).sum() == \ stats[0]['count']
def test_band_alias(): polygons = os.path.join(DATA, 'polygons.shp') stats_a = zonal_stats(polygons, raster) stats_b = zonal_stats(polygons, raster, band=1) with pytest.deprecated_call(): stats_c = zonal_stats(polygons, raster, band_num=1) assert stats_a[0]['count'] == stats_b[0]['count'] == stats_c[0]['count']
def test_copy_properties_warn(): polygons = os.path.join(DATA, 'polygons.shp') # run once to trigger any other unrelated deprecation warnings # so the test does not catch them instead stats_a = zonal_stats(polygons, raster) with pytest.deprecated_call(): stats_b = zonal_stats(polygons, raster, copy_properties=True) assert stats_a == stats_b
def test_mini_raster(): from geopandas import GeoDataFrame polygons = os.path.join(DATA, 'polygons.shp') df = GeoDataFrame.from_file(polygons) stats = zonal_stats(df.geometry, raster, raster_out=True) stats2 = zonal_stats(df.geometry, stats[0]['mini_raster'], raster_out=True, transform=stats[0]['mini_raster_GT']) assert (stats[0]['mini_raster'] == stats2[0]['mini_raster']).sum() == \ stats[0]['count']
def test_transform(): with rasterio.open(raster) as src: arr = src.read(1) affine = src.transform polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, arr, affine=affine) stats2 = zonal_stats(polygons, arr, transform=affine.to_gdal()) assert stats == stats2 pytest.deprecated_call(zonal_stats, polygons, raster, transform=affine.to_gdal())
def test_direct_features_collections(): polygons = os.path.join(DATA, 'polygons.shp') features = read_features(polygons) collection = read_featurecollection(polygons) stats_direct = zonal_stats(polygons, raster) stats_features = zonal_stats(features, raster) stats_collection = zonal_stats(collection, raster) assert stats_direct == stats_features == stats_collection
def test_range(): polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, raster, stats="range min max") for stat in stats: assert stat['range'] == stat['max'] - stat['min'] ranges = [x['range'] for x in stats] # without min/max specified stats = zonal_stats(polygons, raster, stats="range") assert 'min' not in stats[0] assert ranges == [x['range'] for x in stats]
def test_featurecollection(): from geopandas import GeoDataFrame polygons = os.path.join(DATA, 'polygons.shp') df = GeoDataFrame.from_file(polygons) assert df.__geo_interface__['type'] == 'FeatureCollection' stats = zonal_stats(polygons, raster) # geointerface featurecollection stats2 = zonal_stats(df, raster) assert stats == stats2
def test_ndarray_affine(): polygons = os.path.join(DATA, 'polygons.shp') arr, gt = _get_raster_array_gt(raster) stats1 = zonal_stats(polygons, arr, transform=gt) from affine import Affine atrans = Affine.from_gdal(*gt) stats2 = zonal_stats(polygons, arr, transform=atrans) assert stats1[0]['count'] == stats2[0]['count'] stats3 = zonal_stats(polygons, arr, affine=gt) assert stats1[0]['count'] == stats3[0]['count']
def test_geodataframe_zonal(): polygons = os.path.join(DATA, 'polygons.shp') try: import geopandas as gpd df = gpd.read_file(polygons) if not hasattr(df, '__geo_interface__'): pytest.skip("This version of geopandas doesn't support df.__geo_interface__") except ImportError: pytest.skip("Can't import geopands") expected = zonal_stats(polygons, raster) assert zonal_stats(df, raster) == expected
def test_ndarray(): arr, gt = _get_raster_array_gt(raster) polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, arr, transform=gt) assert stats == zonal_stats(polygons, raster) assert stats[0]['count'] == 75 assert stats[1]['count'] == 50 points = os.path.join(DATA, 'points.shp') stats = zonal_stats(points, arr, transform=gt) assert stats == zonal_stats(points, raster) assert sum([x['count'] for x in stats]) == 3 assert round(stats[0]['mean'], 3) == 11.386 assert round(stats[1]['mean'], 3) == 35.547
def zonalStats(zonesShape, raster, reclassTable, statsResultTable): # max is count * maxVal where maxVal is the largest 'to' value in the reclassTable [lowerLimitList, upperLimitList, toValueList] = reclassCSVColumns(reclassTable) toValueList.sort() max = toValueList.pop() # rasterstats zonal_stats returns a dictionary for each Feature in the zonesShape vector in a list zs = zonal_stats(zonesShape, raster, geojson_out=True, stats=['count', 'sum']) l = [] # for each dictionary in the zs list returned by rasterstats ds = ogr.Open(zonesShape) sqlString = "SELECT FID, OGR_GEOM_AREA, DN FROM {0}".format(os.path.splitext(os.path.basename(zonesShape))[0]) layer = ds.ExecuteSQL(sqlString) for d in zs: feature=layer.GetFeature(long(d['id'])) sqM=feature.GetField("OGR_GEOM_AREA") l.append((d['id'], sqM, d['properties']['DN'], d['properties']['sum'], d['properties']['count'], int(d['properties']['count']) * max)) f = open(statsResultTable,'wt') try: writer = csv.writer(f) writer.writerow(('Unique region ID', 'Area in square meters', 'Raster value', 'Sum', 'Count', 'Count*maxVal')) for tup in l: writer.writerow(tup) finally: f.close()
def test_prefix(): polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, raster, prefix="TEST") for key in ['count', 'min', 'max', 'mean']: assert key not in stats[0] for key in ['TESTcount', 'TESTmin', 'TESTmax', 'TESTmean']: assert key in stats[0]
def test_categorical(): polygons = os.path.join(DATA, 'polygons.shp') categorical_raster = os.path.join(DATA, 'slope_classes.tif') stats = zonal_stats(polygons, categorical_raster, categorical=True) assert len(stats) == 2 assert stats[0][1.0] == 75 assert 5.0 in stats[1]
def zs_h2o(gis_path, net_data): """ Returns zonal stats of raster data from shapefile. """ # Affine transformation information: # a = width of a pixel # b = row rotation (typically zero) # c = x-coordinate of the upper-left corner of the upper-left pixel # d = column rotation (typically zero) # e = height of a pixel (typically negative) # f = y-coordinate of the of the upper-left corner of the upper-left pixel # These were taken from the MACA netcdf file a = 0.0417 b = 0 c = -116.6056 - a d = 0 e = -0.0417 f = 49.3127 - e aff = Affine(a, b, c, d, e, f) # Get zone stats for climate divisions stats = ['min', 'max', 'mean', 'median', 'count', 'std'] zs = zonal_stats(gis_path, net_data, affine=aff, stats=stats) return zs
def calculate_pop_value(node, raster_array, affine): stats = zonal_stats(node.polygon, raster_array, affine=affine, stats="sum", nodata=-1) total = stats[0]['sum'] if total: return total else: return 0
def test_geojson_out(): polygons = os.path.join(DATA, 'polygons.shp') features = zonal_stats(polygons, raster, geojson_out=True) for feature in features: assert feature['type'] == 'Feature' assert 'id' in feature['properties'] # from orig assert 'count' in feature['properties'] # from zonal stats
def compute_gridnav(self, geometries, grid): """Figure out how these geometries map to our grid Args: grid (numpy.ndarray): the array to sample values for geometries (geopandas.GeoSeries): geometries to compute over, this should not change over the lifetime of this object """ if geometries is None: _LOG.warning( ("Cowardly refusing to compute gridnav " "with None geometries") ) return # TODO: check nodata usage here zs = zonal_stats(geometries, grid, affine=self.affine, nodata=-1, all_touched=True, raster_out=True) (gridysz, gridxsz) = grid.shape # print("in grid size y: %s x: %s" % (gridysz, gridxsz)) for entry in zs: aff = entry['mini_raster_affine'] # print(aff) x0 = int((aff.c - self.affine.c) / self.affine.a) y0 = int((self.affine.f - aff.f) / abs(self.affine.e)) (ysz, xsz) = entry['mini_raster_array'].mask.shape mask = entry['mini_raster_array'].mask # print("IN: x0: %s y0: %s xsz: %s ysz: %s" % (x0, y0, xsz, ysz)) if x0 >= gridxsz or y0 >= gridysz: # print("out of bounds, skipping") self.gridnav.append(None) continue if x0 < 0: mask = mask[:, abs(x0):] xsz -= abs(x0) x0 = 0 if (x0 + xsz) >= gridxsz: clipx = (x0 + xsz) - gridxsz # print('clipping %s x points' % (clipx, )) mask = mask[:, :(0 - clipx)] xsz -= clipx if y0 < 0: mask = mask[abs(y0):, :] ysz -= abs(y0) y0 = 0 if (y0 + ysz) >= gridysz: clipy = (y0 + ysz) - gridysz # print('clipping %s y points' % (clipy, )) mask = mask[:(0 - clipy), :] ysz -= clipy # TODO: likely need some more thought above to prevent this if ysz < 0 or xsz < 0: self.gridnav.append(None) continue # print("OUT: x0: %s y0: %s xsz: %s ysz: %s" % (x0, y0, xsz, ysz)) self.gridnav.append( GRIDINFO(x0=x0, y0=y0, xsz=xsz, ysz=ysz, mask=mask))
def test_iterable_geolike(): reader = shapefile.Reader(os.path.join(DATA, 'polygons.shp')) geoms = [x.shape.__geo_interface__ for x in reader.shapeRecords()] stats = zonal_stats(geoms, raster) assert len(stats) == 2 assert stats[0]['count'] == 75 assert stats[1]['count'] == 50
def rasterstats_statistics(self,admin_vect_file): from rasterstats import zonal_stats stats = zonal_stats(admin_vect_file, self.population_raster) print stats return "Calcoli statistici effettuati per categorie cicloni e popolazione....\n"
def main(): for t in tifs: stats = rasterstats.zonal_stats(inshp,t,stats=['sum'],copy_properties=True) df = pd.DataFrame(stats) df.set_index("BasinID",inplace=True) df.sort_index(inplace=True) df.to_csv(t[:-4]+'.csv')
def coverage(raster, regions, key_property=None): if key_property is None: key_property = 'id' with rasterio.open(raster) as r: res = r.res cell_area = res[0] * res[1] def _not_nodata(x): return (~x.mask).sum() with fiona.open(regions) as features: key_mapping = {} areas = {} keys = [] stats = zonal_stats( features, raster, stats='', add_stats={'count':_not_nodata}, geojson_out=True) def _coverage(item): count = item['properties']['count'] total_area = shapely.geometry.shape(item['geometry']).area return count * cell_area / total_area result = { item['properties'][key_property]: _coverage(item) for item in stats} return result
def test_some_nodata(): polygons = os.path.join(DATA, 'polygons.shp') raster = os.path.join(DATA, 'slope_nodata.tif') stats = zonal_stats(polygons, raster, stats=['nodata', 'count']) assert stats[0]['nodata'] == 36 assert stats[0]['count'] == 39 assert stats[1]['nodata'] == 19 assert stats[1]['count'] == 31
def test_main(): polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, raster) for key in ['__fid__', 'count', 'min', 'max', 'mean']: assert key in stats[0] assert len(stats) == 2 assert stats[0]['count'] == 75 assert stats[1]['count'] == 50
def test_percentile_good(): polygons = os.path.join(DATA, 'polygons.shp') stats = zonal_stats(polygons, raster, stats="median percentile_50 percentile_90") assert 'percentile_50' in stats[0].keys() assert 'percentile_90' in stats[0].keys() assert stats[0]['percentile_50'] == stats[0]['median'] assert stats[0]['percentile_50'] <= stats[0]['percentile_90']
def test_all_nodata(): polygons = os.path.join(DATA, 'polygons.shp') raster = os.path.join(DATA, 'all_nodata.tif') stats = zonal_stats(polygons, raster, stats=['nodata', 'count']) assert stats[0]['nodata'] == 75 assert stats[0]['count'] == 0 assert stats[1]['nodata'] == 50 assert stats[1]['count'] == 0
def zonal_stats_by_basin(raster, shapefile, output_dir, output_crs=3762): output_projection = CRS.from_epsg(output_crs) output_filename = f'stats_{os.path.basename(raster[:-4])}.geojson' with rio.open(raster) as src: # Read shapefile gdf_unprojected = gpd.read_file(shapefile) # Reproject to same CRS as input raster gdf = gdf_unprojected.to_crs(src.crs) # Clean empty and null geometries gdf = gdf.loc[~(gdf.geometry.is_empty | gdf.geometry.isna())] # Explode MultiPolygons to turn them into Polygons gdf = gdf.loc[gdf.geometry.geom_type == 'MultiPolygon'].explode().reset_index(drop=True) # Create new 'mean' attribute and apply stats_mask2 to populate this column #gdf['mean'] = gdf.geometry.apply(lambda x: stats_mask2(x, src)).apply(np.ma.mean) gdf = gpd.GeoDataFrame.from_features(zonal_stats(gdf, raster, geojson_out=True, stats='mean', nodata=-9999), crs=gdf.crs) # Reproject GeoDataFrame to output_crs gdf.to_crs(epsg=output_crs, inplace=True) # Write the new GeoJSON file to output_dir print(os.path.join(output_dir, output_filename)) gdf.to_file(os.path.join(output_dir, output_filename), driver='GeoJSON') return True
def intersect_network(network, network_details, hazard_details): sector = network_details['sector'] node_or_edge = network_details['node_or_edge'] hazard_path = hazard_details['path'] model = hazard_details['model'] return_period = hazard_details['r_period'] all_stats = zonal_stats(network, hazard_path, stats=['max']) for stats, element in zip(all_stats, network): max_ = stats['max'] if max_ is not None and max_ > 0 and max_ < 999: yield ( node_or_edge, sector, str(element['properties']['id']), model, str(int(return_period)), str(max_), element['properties']['highway'] )
def fast_append_profile_in_gdf(geodataframe, raster_path, force_crs_match=True): """Function that appends the columns of the profile in a geopandas according to a given raster taking advantage of rasterstats geodataframe : a GeoDataFrame from geopandas that has overlay with the raster. The variables of the profile will be appended in this data. If some polygon do not overlay the raster, consider a preprocessing step using the function subset_gdf_polygons_from_raster. raster_path : the path to the associated raster image. force_crs_match : bool. Default is True. Wheter the Coordinate Reference System (CRS) of the polygon will be reprojected to the CRS of the raster file. It is recommended to let this argument as True. Notes ----- The generated geodataframe will input the value 0 for each Type that is not present in the raster for each polygon. """ _check_presence_of_crs(geodataframe) raster_path = fetch_quilt_path(raster_path) if force_crs_match: raster = rasterio.open(raster_path) geodataframe = geodataframe.to_crs(crs=raster.crs.data) else: warnings.warn( "The GeoDataFrame is not being reprojected. The clipping might be being performing on unmatching polygon to the raster." ) zonal_gjson = rs.zonal_stats(geodataframe, raster_path, prefix="Type_", geojson_out=True, categorical=True) zonal_ppt_gdf = GeoDataFrame.from_features(zonal_gjson) return zonal_ppt_gdf
def rasterStatistics2(feature, featureID, inRaster, calculateStat): #https://pypi.python.org/pypi/rasterstats/0.3.2 #http://www.lfd.uci.edu/~gohlke/pythonlibs/#rasterio from rasterstats import zonal_stats try: #convert feature to temp shapefile arcpy.FeatureClassToFeatureClass_conversion(feature, tempFolder, 'temp.shp') #get values stats = zonal_stats(tempFolder + '/temp.shp', inRaster, nodata=0, stats=[calculateStat.lower()]) data = ResultObj(featureID, stats[0]) return data except: tb = format_exc() raise Exception(tb)
def test_iterable_features_geo(): # pyshp doesnt do feature-level geo_interface so we need to construct it reader = shapefile.Reader(os.path.join(DATA, 'polygons.shp')) features = [] class FeatureThing(object): pass fields = reader.fields[1:] field_names = [field[0] for field in fields] for sr in reader.shapeRecords(): geom = sr.shape.__geo_interface__ atr = dict(list(zip(field_names, sr.record))) obj = FeatureThing() obj.__geo_interface__ = dict(geometry=geom, properties=atr, type="Feature") features.append(obj) stats = zonal_stats(features, raster) assert len(stats) == 2 assert stats[0]['count'] == 75 assert stats[1]['count'] == 50
def cmip_exog_var_timeseries_creator(path_to_cmip6_data, path_to_save_folder, exogenous_variable, country, region, polygon, rcp): csv_file_name = path_to_save_folder + "/" + exogenous_variable + "_predictions/NEX_GDDP_yearly_" + rcp + "_" + exogenous_variable + "_Timeseries_" + country + "_" + region + ".csv" if os.path.isfile(csv_file_name): print("File at path: ", csv_file_name, " exists already") return files_list = sorted(os.listdir(path_to_cmip6_data)) years_to_predict = 81 files_list = [x for x in files_list if rcp in x ] files_list = [x for x in files_list if exogenous_variable in x ] time_series = {"Year": [], "Value": []} for file in files_list: temp = zonal_stats(polygon, path_to_cmip6_data + file, stats="mean") if exogenous_variable == "prec": time_series["Value"].append(temp[0]["mean"] * 31556926/10) else: time_series["Value"].append(temp[0]["mean"]-273) starting_year = 2019 years = list(range(starting_year, starting_year + years_to_predict)) for year in years: date = str(year) time_series["Year"].append(date) zd = zip(*time_series.values()) with open(csv_file_name, 'w') as file: writer = csv.writer(file, delimiter=',') writer.writerow(time_series.keys()) writer.writerows(zd) return
def historical_index_timeseries_creator(path_to_hist_index_data, path_to_save_folder, index_name, country, region, polygon): csv_file_name = path_to_save_folder + "/" + index_name + "_historical/Historical_" + index_name + "_Timeseries_" + country + "_" + region + ".csv" if os.path.isfile(csv_file_name): print("File at path: ", csv_file_name, " exists already") return files_list = sorted(os.listdir(path_to_hist_index_data)) time_series = {"Year": [], "Value": []} for file in files_list[:-1]: date = file[-8:-4] temp = zonal_stats(polygon, path_to_hist_index_data + file, stats="mean") time_series["Year"].append(date) time_series["Value"].append(temp[0]["mean"]) zd = zip(*time_series.values()) with open(csv_file_name, 'w') as file: writer = csv.writer(file, delimiter=',') writer.writerow(time_series.keys()) writer.writerows(zd) return
def extract_mean(codename, shp, raster_list, classes): extract_by_mask = [] for feature in classes: polygon = shp.loc[(shp[str(codename)] == feature)] polygon_bbox = polygon.total_bounds for raster in raster_list: data = raster[ -8:-4] ## the name of the raster it's the year, with tha tiff_rec = rasterio.open(raster) window = tiff_rec.window(*polygon_bbox) tiff_rec_np = tiff_rec.read(1, window=window) transform = tiff_rec.window_transform(window) stats = zonal_stats(polygon, tiff_rec_np, affine=transform) for i in stats: media = i['mean'] lista = [feature, data, media] extract_by_mask.append(lista) return extract_by_mask
def get_zonal_stats_from_point(self, point: Vector3) -> List[Optional[Dict]]: results = [] if self.boundary_data: for plugin in (x for x in (self.terrain_data, self.attribute_data, self.flow_dir_data, self.flow_dir_data) if x is not None): if plugin is self.terrain_data: var = self._elevation_attribute.selected elif plugin is self.attribute_data: var = self._attribute.selected else: var = '' raster = plugin.get_data(var) affine = plugin.affine res = plugin.resolution var_stats = plugin.variable_stats(var) nodata = var_stats.nodata_value # Transform point coordinates to crs of raster p = shapely.geometry.Point( *transform.xy(affine, point.x / res, point.y / res)) zones = [] for feat in self.boundary_data.get_features(): if shapely.geometry.shape(feat['geometry']).contains(p): zones.append(feat) # Retrieve zonal stats for this raster result = zonal_stats(zones, raster, affine=affine, nodata=nodata, add_stats=self.zonal_stats) for j, row in enumerate(result): row['Name'] = "{} (Zone {})".format( plugin.data_name, zones[j].get('id')) results.append(row) return results
def calculate_column_change(pop_raster, poly_layer): """Calculates flood impact per isochrone for each column.""" pop_data = [] poly_layer = poly_layer[[ 'counter_1', 'cap_int_1', 'counter_2', 'cap_int_2', 'geometry' ]] for poly_idx_n, poly_cap_n, poly_idx_2, poly_cap_2, poly_geom in zip( poly_layer['counter_1'], poly_layer['cap_int_1'], poly_layer['counter_2'], poly_layer['cap_int_2'], poly_layer['geometry']): feature = gpd.GeoSeries([poly_geom]).to_json() # drop geometries with an area size smaller than 1m² if area(mapping(poly_geom)) > 0.000009039: # calculate population for given area pop_stats = zonal_stats(feature, pop_raster, stats=['sum']) poly_area = area(mapping(poly_geom)) / 1e+6 # in kilometer pop_data.append([ poly_idx_n, poly_cap_n, poly_idx_2, poly_cap_2, pop_stats[0]['sum'], poly_area, poly_geom ]) df = pd.DataFrame(pop_data, columns=[ 'counter_1', 'cap_int_1', 'counter_2', 'cap_int_2', 'pop', 'area', 'geometry' ]) df.dropna(subset=['pop']) df['pop_area'] = df['pop'] / df['area'] # population density df['cap_pop'] = (df['cap_int_1'] / df['pop']) * 100000 df['cap_dens'] = df['cap_int_1'] / df['pop_area'] df['cap_dens_2'] = df['cap_int_2'] / df['pop_area'] df['cap_dens_d'] = df['cap_dens_2'] - df['cap_dens'] df = df.replace([np.inf, -np.inf], np.nan) result_geodf = gpd.GeoDataFrame(df, geometry='geometry') return result_geodf
def do_zonal_stats(buffs, raster, csv_pth, num, log_out, stats): try: u.verify_dir(csv_pth) dataset = raster.split('\\')[-2].replace(' ', '_') raster_name_to_csv = os.path.basename(raster).replace('.tif', '.csv') csv_name = '__'.join([dataset, raster_name_to_csv]) csv_out = os.path.join(csv_pth, csv_name) start = timer() u.write_to_log( ' {}) Raster: {}'.format(num, os.path.basename(raster)), log_out) stats = zonal_stats(buffs, raster, stats=stats, geojson_out=True) print(' zonal_stats... ({} sec.)'.format(round(timer() - start, 2))) start = timer() attributes = [] for item in stats: #print ('{}'.format(item['properties'])) attributes.append(item['properties']) print(' append dicts... ({} sec.)'.format(round(timer() - start, 2))) start = timer() with open(csv_out, 'w', newline='') as outfile: fp = csv.DictWriter(outfile, attributes[0].keys()) fp.writeheader() fp.writerows(attributes) print(' write to csv... ({} sec.)'.format(round(timer() - start, 2))) u.write_to_log(' CSV file: {}'.format(csv_out), log_out) u.write_to_log(' Log file: {}'.format(log_out), log_out) except Exception as e: u.write_to_log(str(e), log_out) u.write_to_log( 'FINISH BUFFERS: {}'.format(time.strftime("%Y-%m-%d %H:%M:%S")), log_out)
def extractarray(rasterpath, shpfile, tile, projectname, ui): date = os.path.split(rasterpath)[-1].split('_')[1][:8] band = os.path.splitext(os.path.split(rasterpath)[-1])[0].split('_')[-2] print('arrayextractor started') a = zonal_stats(shpfile, rasterpath, stats=['mean'], band=1, geojson_out=True, all_touched=False, raster_out=True, nodata=np.nan) myarrays = [] for x in a: myarray = x['properties']['mini_raster_array'] myarray = myarray.filled(-9999) myarray = myarray[myarray != -9999] myarray = myarray.flatten() if np.count_nonzero(myarray) == 0: continue count = len(myarray) myid = [x['properties'][ui.idname]] arr = myarray.tolist() myid.extend(arr) arr = myid meta = extractmeta(rasterpath, myid[0], date, count, projectname, band, tile) myarrays.append(arr) tocsv(date, band, myarrays, tile, projectname)
def corine_stats(root, burnt_areas, corine_path): os.chdir(root) ba = burnt_areas list_columns = [ 'id', 'COUNTRY', 'AREA_HA', 'BROADLEA', 'CONIFER', 'MIXED', 'SCLEROPH', 'TRANSIT', 'OTHERNATLC', 'AGRIAREAS', 'ARTIFSURF', 'OTHERLC', 'PERCNA2K' ] ba = ba[list_columns] f = csv.reader(open('corine_key.csv')) corine_dict = {} for row in f: corine_dict[row[0]] = row[1] corine_dict['1'] = corine_dict.pop('1') corine_dict = {int(k): v for k, v in corine_dict.items()} ba_raw = zonal_stats(ba, corine_path, categorical=True) df_ba_raw = pd.DataFrame.from_dict(ba_raw) df_ba_raw = df_ba_raw.rename(columns=corine_dict) df_ba_raw = df_ba_raw.T df_ba_raw.to_csv('corine_ba_raw.csv') df_mapped = df_ba_raw.groupby(df_ba_raw.index).sum() df_mapped = df_mapped.T df_mapped.to_csv('corine_ba_raw2.csv') df = df_mapped print('--PM') print(ba.columns) l = ba['id'].to_list() a = ba['AREA_HA'].to_list() df = df.assign(id=l, area=a) # df.to_csv('__df_test.csv') df_merge = df.merge(ba[['id', 'COUNTRY' '']], how='inner', on='id') df_group = df_merge.groupby(['COUNTRY']).sum() df_group.to_csv('__test_group.csv') print('\n----- df group columns\n') print(df_group.columns) print('---end') pass
def _fast_append_profile_in_gdf(geodataframe, raster_path, force_crs_match=True): """Append categorical zonal statistics (counts by pixel type) as columns to an input geodataframe. geodataframe : geopandas.GeoDataFrame geodataframe that has overlay with the raster. If some polygon do not overlay the raster, consider a preprocessing step using the function subset_gdf_polygons_from_raster. raster_path : str path to the raster image. force_crs_match : bool, Default is True. Whether the Coordinate Reference System (CRS) of the polygon will be reprojected to the CRS of the raster file. It is recommended to let this argument as True. Notes ----- The generated geodataframe will input the value 0 for each Type that is not present in the raster for each polygon. """ _check_presence_of_crs(geodataframe) if force_crs_match: with rio.open(raster_path) as raster: geodataframe = geodataframe.to_crs(crs=raster.crs.data) else: warnings.warn( "The GeoDataFrame is not being reprojected. The clipping might be being performing on unmatching polygon to the raster." ) zonal_gjson = rs.zonal_stats(geodataframe, raster_path, prefix="Type_", geojson_out=True, categorical=True) zonal_ppt_gdf = gpd.GeoDataFrame.from_features(zonal_gjson) return zonal_ppt_gdf
def run_function(params): # Prepare the parameters subject_ref = params['subject'] raster = params['raster'] stats = params.get('stats', 'mean') geojson_out = params.get('geojson_out', True) try: subject = fiona.open(subject_ref) subject_types = get_subject_types(subject) if 'Polygon' in subject_types or 'MultiPolygon' in subject_types: features = zonal_stats(subject, raster, stats=stats, geojson_out=geojson_out) elif 'Point' in subject_types or 'MultiPoint' in subject_types: features = point_query(subject, raster, geojson_out=geojson_out) else: raise ValueError( "Input features need to be either all Polygons or Points. Doesn't look like they are." ) # Decorate return as either GeoJSON FeatureCollection (other way to do this?) or just the array of stats if geojson_out: return FeatureCollection(features) else: return features except ValueError as e: if e == 'Specify either bounds or window': raise ValueError( "One or more features are lacking geometry or have null/empty values" ) except AttributeError: raise ValueError( "Error calculating zonalstats. Please confirm every input " "geometry is valid, and contains coordinates")
def zonal_stats_csv(polygon,raster,headers,filename,year): stats = rasterstats.zonal_stats( polygon, raster, stats='sum', geojson_out=True )#,stats='count min mean max median') # Default encoding to utf-8 for odd string characters import sys reload(sys) sys.setdefaultencoding('utf-8') # Write to CSV with open(filename,'w') as f: f.write(','.join(headers)) f.write('\n') for x in stats: p = x['properties'] s = [str(p[h]).encode('utf-8') for h in headers] s[1] = s[1].replace(',','') f.write(','.join(s)) f.write('\n')
def calculate_stats(zones, geotiff, group_by=None, stats=DEFAULT_STATS, prefix='stats_', geojson_out=False): """Calculate stats.""" if group_by: zones = _group_zones(zones, group_by) stats = zonal_stats(zones, geotiff, stats=stats, prefix=prefix, geojson_out=geojson_out) if not geojson_out: feature_properties = _extract_features_properties(zones) stats = [{ **properties, **stat } for stat, properties in zip(stats, feature_properties)] return stats
def integrate_shapes(filename: str, shapefile: str, target: str) -> gpd.GeoDataFrame: """ This functions opens a geotiff with desired data, converts to a raster, integrate the data into polygons and returns a GeoDataFrame object. Parameters: cuencas_shp: Path to shapefile Returns: cuencas_gdf_ppn (GeoDataFrame): a geodataframe with cuerncas and ppn """ cuencas_gdf: gpd.GeoDataFrame = gpd.read_file(shapefile, encoding='utf-8') df_zs = pd.DataFrame(zonal_stats(shapefile, filename, all_touched=True)) cuencas_gdf_ppn = pd.concat([cuencas_gdf, df_zs], axis=1).dropna(subset=['mean']) if target == "zonas": COLUM_REPLACE = {'Name': 'zona'} cuencas_gdf_ppn = cuencas_gdf_ppn.rename(columns=COLUM_REPLACE) return cuencas_gdf_ppn[['zona', 'geometry', 'mean']] return None
def get_zonal_stats(polygon, tif_file_path): ''' Given a single polygon and the path to the corresponding tif file, calculates zonal stats for the shape outlined by the polygon and returns them as a dataframe. ''' df = pd.DataFrame() for i in range(1, 4): tats = rasterstats.zonal_stats( polygon, tif_file_path, stats=['min', 'max', 'median', 'majority', 'sum'], band=i) df[i] = pd.Series(tats[0]['median']) df['roof'] = polygon['roof_material'] df.set_index('roof', inplace=True) return df
def postprocess_CHM(df, lookup_pool): """Field measured height must be within min_diff meters of canopy model""" #Extract zonal stats try: CHM_path = find_sensor_path(lookup_pool=lookup_pool, bounds=df.total_bounds) except Exception as e: raise ValueError("Cannot find CHM path for {} from plot {} in lookup_pool: {}".format(df.total_bounds, df.plotID.unique(),e)) draped_boxes = rasterstats.zonal_stats(df.geometry.__geo_interface__, CHM_path, add_stats={'q99': non_zero_99_quantile}) df["CHM_height"] = [x["q99"] for x in draped_boxes] #if height is null, assign it df.height.fillna(df["CHM_height"], inplace=True) df = df[df.CHM_height>1] ##Rename column #if remove: ##drop points with less than 1 m height #df = df[(abs(df.height - df.CHM_height) < min_diff)] return df
def test_switzerland_rooftop_area(): with open(PATH_TO_SONNENDACH_AREA_ESTIMATE, "r") as f_sonnendach_estimate: sonnendach_estimate = float(f_sonnendach_estimate.readline()) with fiona.open(PATH_TO_NUTS.as_posix(), "r", layer="nuts0") as shapefile: switzerland = [ feature["geometry"] for feature in shapefile if feature["properties"]["country_code"] == "CHE" ] assert len(switzerland) == 1 with rasterio.open(PATH_TO_AREAS.as_posix()) as src: transform = src.transform areas = src.read(1) with rasterio.open(PATH_TO_CATEGORIES.as_posix()) as src: categories = src.read(1) areas[categories != Eligibility.ROOFTOP_PV] = 0 zs = zonal_stats(switzerland, areas, affine=transform, stats="sum", nodata=-999) our_estimate = zs[0]["sum"] assert our_estimate == pytest.approx(sonnendach_estimate, 0.02) # 2% tolerance
def zonal_to_shp(tifs_dir, shp_path): """ calculate zonal statistics of list of tifs in directory using countries shapefile """ tifs_glob_str = tifs_dir + '/*.tif' tifs = glob.glob(tifs_glob_str) ids = get_tif_ids(tifs_dir) geodataframe = gpd.GeoDataFrame.from_file(shp_path) for idx, tif in enumerate(tifs): stats = zonal_stats(shp_path, tif, nodata=255, stats='count mean') li_stats = [] for i in range(len(stats)): try: the_mean = stats[i]['mean'] the_count = stats[i]['count'] the_total = the_mean * the_count except: the_total = 255 li_stats.append(the_total) s_stats = pd.Series(li_stats, name=ids[idx]) geodataframe = geodataframe.join(s_stats) return geodataframe
def extract_features(image, image_label, segments, stats=["mean"], band_num=1, image_transform=None, nodata=-9999): """ Extract features by calculating raster statistics Inputs: image: nd array or str Image to retrieve statistics from, array or path to image file. image_label: str Label describing content of image. segments: geopandas GeoDataFrame or str Segments to retrieve statistics for, GeoDataFrame or path to segments file. stats: list (default=["mean"]) List of statistics to calculate. band_num: int (default=1) Image band number to use (counting from 1). image_transform: Affine or None (default=None) Transformation from pixel to geographic coordinates, only required if image is ndarray. nodata: float or None (default=-9999) Value to assign if no image data to retrieve. Outputs: image_label: str statistics: list of dicts """ print("{} - {} - Extracting features for {}".format( datetime.datetime.now(), os.getpid(), image_label)) return image_label, rs.zonal_stats(segments, image, stats=stats, affine=image_transform, band_num=band_num, nodata=nodata)
def main(): print(datetime.now().time()) # Read data zones = geopandas.read_file(zones_file) # Uncomment next 3 rows, if you need to read the file from disk. # zonal_stats does not directly work with rasterio opened file, but needs data and transformation variables raster = rasterio.open(raster_file) affine = raster.transform array = raster.read(1) results = zonal_stats(zones.geometry, array, affine=affine, stats=statistics) # Use this if raster data is not read to memory #results = zonal_stats(zones.geometry, raster_file, stats=statistics) # Join the results back to geopandas dataframe for stat in statistics: results_as_list = [d[stat] for d in results] zones[stat] = results_as_list # Write the results to file zones.to_file(zonal_file)
def labelPburns(): print("Reading filepaths...") filepath_mask = Path("./results") fps_masks = sorted(list(filepath_mask.glob("predict*.tif"))) print("Going through all shapefiles...") pathShapeFiles = Path("./data/shapefiles/trainingData") for i in range(len(fps_masks) - 2): nameID = fps_masks[i].name.split('_')[3].replace('.tif', '') fileShapes = sorted(list(pathShapeFiles.glob("*" + nameID + "*.shp"))) print("reading raster") raster_src_mask, mask = readOneImg(fps_masks[i]) print("reading shapefiles for", nameID) for fileShape in fileShapes: gdf = gpd.read_file(fileShape) gdfcrs = gdf.crs prediction_segs = zonal_stats( gdf['geometry'], mask, affine=raster_src_mask.meta['transform'], stats='majority', nodata=-999, all_touched=True) df = pd.DataFrame(prediction_segs) gdf['pred_burn'] = df.to_numpy() gdf = gdf.astype({"pred_burn": int}) gdf = gpd.GeoDataFrame(gdf, crs=gdfcrs) newFileName = fileShape.parent / Path( fileShape.name.split('.shp')[0] + "_pburn.shp") print("writing to path...", newFileName) gdf.to_file(newFileName)
def add_zonal_fields(vector, raster, affine, prefix, band=0, stats=['mean', 'min', 'max', 'std']): """ Adds zonal statistics to an existing vector file. Add more documentation. """ raster_stats = zonal_stats(vector, raster[band], stats=stats, affine=affine) for item in raster_stats: items = tuple(item.items()) for k, v in items: item[prefix + "_" + k] = v item.pop(k) for v, rs in zip(vector, raster_stats): v['properties'] = OrderedDict(v['properties'], **rs)
def ZonalShapeDensity(shapegdf, zonegdf, cellsize, shapetoarray=None): # works only under following situations, future upgrades to accommodate other units # density unit: square mile # input unit: meters """ :param GeoDataFrame shapegdf: input shape features can be either line or polygon features :param GeoDataFrame zonegdf: zone GeoDataFrame units to collect statistics :param int cellsize: the cell size used to convert vector to raster :param obj shapetoarray: the numpy array to use, if a desired numpy array has already been created :return: a pandas series that contains the density value for each zone feature :rtype: Series """ if shapetoarray is None: indata = ShapeToArray(shapegdf, cellsize) else: indata = shapetoarray zonegdf = zonegdf.join( pd.DataFrame(zonal_stats(zonegdf.centroid.buffer(907.9751), indata[0], affine=indata[1], stats=['sum'], nodata=0), index=zonegdf.index)) return zonegdf['sum'] * 1.4142 * cellsize / 1609.344
def get_zonal_stats_from_feature( self, feature: LinearRing) -> List[Optional[Dict]]: results = [] if self.terrain_data: # Normalize feature coordinates to terrain resolution t_res = self.terrain_data.resolution normalized_coords = [(p[0] / t_res, p[1] / t_res) for p in feature.coords] for plugin in (x for x in (self.terrain_data, self.attribute_data, self.flow_dir_data, self.flow_dir_data) if x is not None): if plugin is self.terrain_data: var = self._elevation_attribute.selected elif plugin is self.attribute_data: var = self._attribute.selected else: var = '' raster = plugin.get_data(var, Timeline.app().current) affine = plugin.affine var_stats = plugin.variable_stats(var) nodata = var_stats.nodata_value feat = Polygon( *[[transform.xy(affine, *p) for p in normalized_coords]]) # Transform normalized raster coordinates to CRS of raster to query and obtain results result = zonal_stats(feat, raster, affine=affine, nodata=nodata, add_stats=self.zonal_stats)[0] result['Name'] = plugin.data_name results.append(result) return results
def calc_stats(rx, ry, gdf, ndist, trsfm, tps): # fonction calculant les stats à partir de la géodatabase(gdf). rx,ry = reso spatiale, ndist=nb de districts, trsfm=géométrie de la matrice de la variable, tps=matrice des dates # matrices vides aux dimensions du "bloc temporel" (len(tps) correspond à l'axe 0 de la mat tps) et du nombre de districts/aires/pays (ndist) nb_px = np.zeros((len(tps), ndist)) nb_px[:] = np.nan v_max = np.zeros((len(tps), ndist)) v_max[:] = np.nan v_mean = np.zeros((len(tps), ndist)) v_mean[:] = np.nan v_med = np.zeros((len(tps), ndist)) v_med[:] = np.nan v_min = np.zeros((len(tps), ndist)) v_min[:] = np.nan v_std = np.zeros((len(tps), ndist)) v_std[:] = np.nan for i in range(len(tps)): # "micro-pixelisation" pour obtenir une pseudo-résolution plus fine, adéquate au découpage district/aire var1 = np.repeat(tps[i, ...], 100 * ry, axis=0) var2 = np.repeat(var1, 100 * rx, axis=1) val_input = np.ma.masked_array(var2, np.isnan(var2)) stats = zonal_stats( gdf['geometry'], val_input, transform=trsfm, stats=['min', 'max', 'mean', 'count', 'std', 'median']) #fonction stat du module rasterstats df = gdf.join( pd.DataFrame(stats)) # chargement des stats dans la geodataframe #chargement des stats dans les différentes matrices nb_px[i, :] = np.array(df['count'].ix[:]) v_max[i, :] = np.array(df['max'].ix[:]) v_mean[i, :] = np.array(df['mean'].ix[:]) v_med[i, :] = np.array(df['median'].ix[:]) v_min[i, :] = np.array(df['min'].ix[:]) v_std[i, :] = np.array(df['std'].ix[:]) return nb_px, v_max, v_mean, v_med, v_min, v_std
def chop(self, time_var='time', value_var='scpdsi'): """Chop the raster stats over the years.""" nc_var = self.nc_ds[value_var] logger.info(f'{len(nc_var)}') nc_times = self.nc_ds[time_var].values logger.info(f'Parsing {len(nc_times)} times') for nc_time in nc_times: logger.info(f'Parsing time {nc_time}') nc_arr = nc_var.sel(time=nc_time) nc_arr_values = nc_arr.values stats_data = zonal_stats(self.shape_file, nc_arr_values, affine=self.affine, stats=self.statistics, geojson_out=self.geojson, all_touched=self.all_touched) sd = pd.DataFrame.from_dict(stats_data) df = pd.DataFrame(self.shape_df) dat = pd.concat([df, sd], axis=1) logging.info(f'{nc_time}') dat['time'] = nc_time if self.geometry is False: dat.drop(columns='geometry', inplace=True, errors='ignore') self.df_list.append(dat)