class RasterShape(object): """ Provide a object to make it more convenient to deal with a raster and a shapefile. Parameters ---------- rds : RasterDS or string or QGIS raster layer If `rds` is a RasterDS, then yay. If not, try to make a RasterDS out of whatever `rds` is. RasterDS can take a filepath to a GDAL compatible raster (like a GeoTiff) or a QGIS raster layer. shp : GeoPandas.GeoDataFrame or string (filepath to a shapefile) If `shp` is a GeoDataFrame, that will be used. Otherwise `shp` will be assumed to be a filepath string and will be handed to GeoPandas.read_file(). gdf_query : string or None A string for the pandas query method: http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.query.html Where a single geometry is used in a method: If `None` is passed, the first geometry in the GeoDataFrame will be used. If a query is passed, the first geometry in the query results will be used. """ def __init__(self, rds, shp, gdf_query=None): if type(rds).__name__ == 'RasterDS': self.rds = rds else: self.rds = RasterDS(rds) if type(shp).__name__ == 'GeoDataFrame': self.gdf = shp else: self.gdf = gpd.read_file(shp) def geometry_subset(self, gdf_query=None, all_touched=False): if gdf_query == None: geom = self.gdf.ix[0].geometry else: geom = gdf.query(gdf_query).ix[0].geometry return self.rds.geometry_subset(geom, all_touched=all_touched)
class RasterShape(object): """ Provide a object to make it more convenient to deal with a raster and a shapefile. Parameters ---------- rds : RasterDS or string or QGIS raster layer If `rds` is a RasterDS, then yay. If not, try to make a RasterDS out of whatever `rds` is. RasterDS can take a filepath to a GDAL compatible raster (like a GeoTiff) or a QGIS raster layer. shp : GeoPandas.GeoDataFrame or string (filepath to a shapefile) If `shp` is a GeoDataFrame, that will be used. Otherwise `shp` will be assumed to be a filepath string and will be handed to GeoPandas.read_file(). gdf_query : string or None A string for the pandas query method: http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.query.html Where a single geometry is used in a method: If `None` is passed, the first geometry in the GeoDataFrame will be used. If a query is passed, the first geometry in the query results will be used. """ def __init__(self, rds, shp, gdf_query=None): if type(rds).__name__ == 'RasterDS': self.rds = rds else: self.rds = RasterDS(rds) if type(shp).__name__ == 'GeoDataFrame': self.gdf = shp else: self.gdf = gpd.read_file(shp) def geometry_subset(self, gdf_query=None, all_touched=False): if gdf_query == None: geom = self.gdf.ix[0].geometry else: geom = gdf.query(gdf_query).ix[0].geometry return self.rds.geometry_subset(geom, all_touched=all_touched)
class RasterShape(object): """ Provide a object to make it more convenient to deal with a raster and a shapefile. Parameters ---------- rds : RasterDS or string or QGIS raster layer If `rds` is a RasterDS, then yay. If not, try to make a RasterDS out of whatever `rds` is. RasterDS can take a filepath to a GDAL compatible raster (like a GeoTiff) or a QGIS raster layer. shp : GeoPandas.GeoDataFrame or string (filepath to a shapefile) If `shp` is a GeoDataFrame, that will be used. Otherwise `shp` will be assumed to be a filepath string and will be handed to GeoPandas.read_file(). gdf_query : string or None A string for the pandas query method: http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.query.html Where a single geometry is used in a method: If `None` is passed, the first geometry in the GeoDataFrame will be used. If a query is passed, the first geometry in the query results will be used. """ def __init__(self, rds, shp, gdf_query=None): if type(rds).__name__ == 'RasterDS': self.rds = rds else: self.rds = RasterDS(rds) if type(shp).__name__ == 'GeoDataFrame': self.gdf = shp else: self.gdf = gpd.read_file(shp) def geometry_subset(self, gdf_query=None, all_touched=False): if gdf_query == None: geom = self.gdf.ix[0].geometry else: geom = gdf.query(gdf_query).ix[0].geometry return self.rds.geometry_subset(geom, all_touched=all_touched) def rmse(self, rast_col='Raster Value', point_col='Point Value'): df = self.gdf df[rast_col] = self.point_sample() errs = (df[point_col] - df[rast_col]) return np.sqrt(np.square(errs).sum() / float(errs.count())) def rsquared(self, rast_col='Raster Value', point_col='Point Value'): df = self.gdf df[rast_col] = self.point_sample() return df[[rast_col,point_col]].corr().ix[0,1]**2 def point_sample(self): """ If `self.shp` is a point shapefile, sample `self.rds` at each point and return a `geopandas.GeoSeries` of the results. """ gser = self.gdf.geometry.apply(lambda p: self.rds.value_at_point(p)) gser[gser==self.rds.band_array.fill_value] = np.nan return gser def seaborn_jointplot(self, rast_col='Raster Value', point_col='Point Value'): import seaborn as sns df = self.gdf df[rast_col] = self.point_sample() def r2(x,y): return stats.pearsonr(x,y)[0] ** 2 fig = sns.jointplot(rast_col, point_col, data=df, kind='reg', stat_func=r2) return fig def hexbin_plot(self, rast_col='Raster Value', point_col='Point Value'): df = self.gdf df[rast_col] = self.point_sample() fig,ax = plt.subplots(1,1) mapa = ax.hexbin(df[point_col],df[rast_col],mincnt=1,bins=None,gridsize=500,\ cmap=plt.cm.jet) ax.set_xlabel(point_col) ax.set_ylabel(rast_col) ax.set_aspect('equal') dmin = df[point_col].min() dmax = df[point_col].max() ax.plot([dmin,dmax],[dmin,dmax],c='black',alpha=0.6) ax.set_title(r"RMSE: {:.2f}, $R^2$: {:.2f}".format(self.rmse(rast_col,point_col),\ self.rsquared(rast_col,point_col))) return fig
class ParameterEstimator(RasterShape): """ An object to simplify the process of estimating some apparent optical properties (AOPs). Most importantly, the diffuse attenuation coefficient (K). I need to add more documentation. """ def __init__(self, img_rds, depth_rds, sand_shp, gdf_query=None, depth_range=None, surface_refraction=False, surface_reflectance=False): self.surf_reflectance = surface_reflectance self.surf_refraction = surface_refraction self.depth_range = depth_range if type(img_rds).__name__ == 'RasterDS': self.img_rds = img_rds else: self.img_rds = RasterDS(img_rds) if type(depth_rds).__name__ == 'RasterDS': self.depth_rds = depth_rds else: self.depth_rds = RasterDS(depth_rds) if type(sand_shp).__name__ == 'GeoDataFrame': self.gdf = sand_shp else: self.gdf = gpd.read_file(sand_shp) self.gdf_query = gdf_query # self.full_image_array = self.img_rds.band_array self._set_arrays() def copy(self, gdf_query="unchanged", depth_range="unchanged", surface_refraction="unchanged", surface_reflectance="unchanged"): if gdf_query is "unchanged": gdf_query = self.gdf_query if depth_range is "unchanged": depth_range = self.depth_range if surface_refraction is "unchanged": surface_refraction = self.surf_refraction if surface_reflectance is "unchanged": surface_reflectance = self.surf_reflectance return ParameterEstimator(self.img_rds, self.depth_rds, self.gdf, gdf_query, depth_range, surface_refraction, surface_reflectance) @property def _unequal_image_subset(self): """ The image array masked outside of the geometry. The mask on this array may not match the mask on the depth array. """ # print "Image Shape: {}".format(self.img_rds.geometry_subset(self.geometry).shape) return self.img_rds.geometry_subset(self.geometry) @property def _unequal_depth_subset(self): """ The depth array masked outside of the geometry. The mask on this array may not match the mask on the image array. """ darr = self.depth_rds.geometry_subset(self.geometry).squeeze() if type(self.depth_range).__name__ != 'NoneType': darr = np.ma.masked_outside(darr, *self.depth_range) # print "Depth Shape: {}".format(darr.shape) return darr def set_depth_range(self, depth_range): self.depth_range = depth_range self._set_arrays() def _set_arrays(self): imarr, darr = equalize_array_masks(self._unequal_image_subset, self._unequal_depth_subset) fullim = self.img_rds.band_array if self.surf_refraction: imarr = surface_refraction_correction(imarr) fullim = surface_refraction_correction(fullim) if self.surf_reflectance: acceptable = ['ndarray','list','tuple'] if type(self.surf_reflectance).__name__ in acceptable: imarr = surface_reflectance_correction(imarr, self.surf_reflectance) fullim = surface_reflectance_correction(fullim, self.surf_reflectance) elif self.surf_reflectance == True: imarr = surface_reflectance_correction(imarr) fullim = surface_reflectance_correction(fullim) else: raise TypeError("If self.surf_reflectance doesn't evaluate to \ False, then it should be an ndarray, a list, or a \ tuple.") self.image_subset_array = imarr self.full_image_array = fullim self.depth_subset_array = darr.squeeze() return True def same_resolution(self, print_res=False): """ Check if the gdal geotransforms match for the rasters. If they match, the resolutions are the same. """ gt1 = np.array(self.img_rds.gdal_ds.GetGeoTransform())[[1,5]] gt2 = np.array(self.depth_rds.gdal_ds.GetGeoTransform())[[1,5]] if print_res: print gt1, gt2 return np.allclose(gt1, gt2) @property def geometry(self): """ Return a single geometry from `self.gdf` (the GeoDataFrame representation of `sand_shp`). If `gdf_query` has not been set, the geometry returned will just be the first geometry in `sand_shp`. If `gdf_query` has been set, the returned geometry will be the first one returned by that query. Returns ------- shapely.geometry A geometry shapely (https://pypi.python.org/pypi/Shapely) geometry object. """ if self.gdf_query == None: geom = self.gdf.ix[0].geometry else: geom = self.gdf.query(self.gdf_query).iloc[0].geometry return geom def deep_water_means(self, p=10, win_size=3, win_percentage=50): """ This is really the darkest pixel means base on brightness. In some cases this may select shallow water over a dark bottom rather than selecting deep water. If you're using this for the Lyzenga log transformation, you probably don't want that. For more information see the docstrings in `OpticalRS.Lyzenga2006`. You can use `dark_pixel_array` to figure out which pixels are actually being selected. """ dpa = dark_pixel_array(self.full_image_array, p=p, win_size=win_size, win_percentage=win_percentage) deep_water_means = dpa.reshape(-1,dpa.shape[-1]).mean(0) return deep_water_means.data def dark_percentile(self, p=1): """ This is a simpler alternative (of sorts) to `deep_water_means`. Rather than using a version of Lyzenga's criteria to select the pixels, this method just chooses gives the `p`th percetile of each band. This method assumes that land has been masked from the image (shadows on land can be darker than water and that could throw off the returns). If you subtract these values from the image bands and curve fit, you'll get `Rinf` values close to zero. This is like having the deep water as completely black. The left over signal might approximate just what's reflected from the bottom. ...or not. This is just something I messed around with a bit. """ # I had problems trying to subtract these values as type float64 from # a float32 image so I'm casting them to float32 here. return band_percentiles(self.full_image_array, p=p).astype(np.float32) def linear_parameters(self, deep_water_means=None, geometric_factor=2.0): if type(deep_water_means).__name__ == 'NoneType': dwm = self.deep_water_means() else: dwm = deep_water_means X = np.ma.log(self.image_subset_array - dwm) # X, Xdepth = equalize_array_masks(X, self.depth_subset_array) Xdepth = self.depth_subset_array params = regressions(Xdepth, X) Kg_arr = -1 * params[0] n_pix = X.reshape(-1,X.shape[-1]).count(0) nbands = np.atleast_3d(X).shape[-1] pardf = pd.DataFrame(Kg_arr, columns=["Kg"], index=wv2_center_wavelength[:nbands]) pardf['K'] = pardf.Kg / geometric_factor pardf['nPix'] = n_pix return pardf def linear_fit_plot(self, deep_water_means=None, visible_only=True): if type(deep_water_means).__name__ == 'NoneType': dwm = self.deep_water_means() else: dwm = deep_water_means X = np.ma.log(self.image_subset_array - dwm) Xdepth = self.depth_subset_array fig = regression_plot(Xdepth, X, visible_only=visible_only) return fig # return X, Xdepth def curve_fit_parameters(self, geometric_factor=2.0): paramdf = param_df(self.depth_subset_array, self.image_subset_array, geometric_factor=geometric_factor) return paramdf def curve_fit_plots(self, params=None, plot_params=True, ylabel='Reflectance', visible_only=True): return albedo_parameter_plots(self.image_subset_array, self.depth_subset_array, params=params, plot_params=plot_params, ylabel=ylabel, visible_only=visible_only) def K_comparison_plot(self, paramdf, columns='K', figure_title="$K$ Estimates vs. $K$ Values from Jerlov"): return jerlov_Kd_plot(paramdf, columns, figure_title)
class ParameterEstimator(RasterShape): """ An object to simplify the process of estimating some apparent optical properties (AOPs). Most importantly, the diffuse attenuation coefficient (K). I need to add more documentation. """ def __init__(self, img_rds, depth_rds, sand_shp, gdf_query=None, depth_range=None, surface_refraction=False, surface_reflectance=False): self.surf_reflectance = surface_reflectance self.surf_refraction = surface_refraction self.depth_range = depth_range if type(img_rds).__name__ == 'RasterDS': self.img_rds = img_rds else: self.img_rds = RasterDS(img_rds) if type(depth_rds).__name__ == 'RasterDS': self.depth_rds = depth_rds else: self.depth_rds = RasterDS(depth_rds) if type(sand_shp).__name__ == 'GeoDataFrame': self.gdf = sand_shp else: self.gdf = gpd.read_file(sand_shp) self.gdf_query = gdf_query # self.full_image_array = self.img_rds.band_array self._set_arrays() def copy(self, gdf_query="unchanged", depth_range="unchanged", surface_refraction="unchanged", surface_reflectance="unchanged"): if gdf_query is "unchanged": gdf_query = self.gdf_query if depth_range is "unchanged": depth_range = self.depth_range if surface_refraction is "unchanged": surface_refraction = self.surf_refraction if surface_reflectance is "unchanged": surface_reflectance = self.surf_reflectance return ParameterEstimator(self.img_rds, self.depth_rds, self.gdf, gdf_query, depth_range, surface_refraction, surface_reflectance) @property def _unequal_image_subset(self): """ The image array masked outside of the geometry. The mask on this array may not match the mask on the depth array. """ # print "Image Shape: {}".format(self.img_rds.geometry_subset(self.geometry).shape) return self.img_rds.geometry_subset(self.geometry) @property def _unequal_depth_subset(self): """ The depth array masked outside of the geometry. The mask on this array may not match the mask on the image array. """ darr = self.depth_rds.geometry_subset(self.geometry).squeeze() if type(self.depth_range).__name__ != 'NoneType': darr = np.ma.masked_outside(darr, *self.depth_range) # print "Depth Shape: {}".format(darr.shape) return darr def set_depth_range(self, depth_range): self.depth_range = depth_range self._set_arrays() def _set_arrays(self): imarr, darr = equalize_array_masks(self._unequal_image_subset, self._unequal_depth_subset) fullim = self.img_rds.band_array if self.surf_refraction: imarr = surface_refraction_correction(imarr) fullim = surface_refraction_correction(fullim) if self.surf_reflectance: acceptable = ['ndarray','list','tuple'] if type(self.surf_reflectance).__name__ in acceptable: imarr = surface_reflectance_correction(imarr, self.surf_reflectance) fullim = surface_reflectance_correction(fullim, self.surf_reflectance) elif self.surf_reflectance == True: imarr = surface_reflectance_correction(imarr) fullim = surface_reflectance_correction(fullim) else: raise TypeError("If self.surf_reflectance doesn't evaluate to \ False, then it should be an ndarray, a list, or a \ tuple.") self.image_subset_array = imarr self.full_image_array = fullim self.depth_subset_array = darr.squeeze() return True def same_resolution(self, print_res=False): """ Check if the gdal geotransforms match for the rasters. If they match, the resolutions are the same. """ gt1 = np.array(self.img_rds.gdal_ds.GetGeoTransform())[[1,5]] gt2 = np.array(self.depth_rds.gdal_ds.GetGeoTransform())[[1,5]] if print_res: print gt1, gt2 return np.allclose(gt1, gt2) @property def geometry(self): """ Return a single geometry from `self.gdf` (the GeoDataFrame representation of `sand_shp`). If `gdf_query` has not been set, the geometry returned will just be the first geometry in `sand_shp`. If `gdf_query` has been set, the returned geometry will be the first one returned by that query. Returns ------- shapely.geometry A geometry shapely (https://pypi.python.org/pypi/Shapely) geometry object. """ if self.gdf_query == None: geom = self.gdf.ix[0].geometry else: geom = gdf.query(self.gdf_query).ix[0].geometry return geom def deep_water_means(self, p=10, win_size=3, win_percentage=50): """ This is really the darkest pixel means base on brightness. In some cases this may select shallow water over a dark bottom rather than selecting deep water. If you're using this for the Lyzenga log transformation, you probably don't want that. For more information see the docstrings in `OpticalRS.Lyzenga2006`. You can use `dark_pixel_array` to figure out which pixels are actually being selected. """ dpa = dark_pixel_array(self.full_image_array, p=p, win_size=win_size, win_percentage=win_percentage) deep_water_means = dpa.reshape(-1,dpa.shape[-1]).mean(0) return deep_water_means.data def dark_percentile(self, p=1): """ This is a simpler alternative (of sorts) to `deep_water_means`. Rather than using a version of Lyzenga's criteria to select the pixels, this method just chooses gives the `p`th percetile of each band. This method assumes that land has been masked from the image (shadows on land can be darker than water and that could throw off the returns). If you subtract these values from the image bands and curve fit, you'll get `Rinf` values close to zero. This is like having the deep water as completely black. The left over signal might approximate just what's reflected from the bottom. ...or not. This is just something I messed around with a bit. """ # I had problems trying to subtract these values as type float64 from # a float32 image so I'm casting them to float32 here. return band_percentiles(self.full_image_array, p=p).astype(np.float32) def linear_parameters(self, deep_water_means=None, geometric_factor=2.0): if type(deep_water_means).__name__ == 'NoneType': dwm = self.deep_water_means() else: dwm = deep_water_means X = np.ma.log(self.image_subset_array - dwm) # X, Xdepth = equalize_array_masks(X, self.depth_subset_array) Xdepth = self.depth_subset_array params = regressions(Xdepth, X) Kg_arr = -1 * params[0] n_pix = X.reshape(-1,X.shape[-1]).count(0) nbands = np.atleast_3d(X).shape[-1] pardf = pd.DataFrame(Kg_arr, columns=["Kg"], index=wv2_center_wavelength[:nbands]) pardf['K'] = pardf.Kg / geometric_factor pardf['nPix'] = n_pix return pardf def linear_fit_plot(self, deep_water_means=None, visible_only=True): if type(deep_water_means).__name__ == 'NoneType': dwm = self.deep_water_means() else: dwm = deep_water_means X = np.ma.log(self.image_subset_array - dwm) Xdepth = self.depth_subset_array fig = regression_plot(Xdepth, X, visible_only=visible_only) return fig # return X, Xdepth def curve_fit_parameters(self, geometric_factor=2.0): paramdf = param_df(self.depth_subset_array, self.image_subset_array, geometric_factor=geometric_factor) return paramdf def curve_fit_plots(self, params=None, plot_params=True, ylabel='Reflectance', visible_only=True): return albedo_parameter_plots(self.image_subset_array, self.depth_subset_array, params=params, plot_params=plot_params, ylabel=ylabel, visible_only=visible_only) def K_comparison_plot(self, paramdf, columns='K', figure_title="$K$ Estimates vs. $K$ Values from Jerlov"): return jerlov_Kd_plot(paramdf, columns, figure_title)