def extract_xy(self, xys, return_array=False, progress=False): """Samples pixel values using an array of xy locations. Parameters ---------- xys : 2d array-like x and y coordinates from which to sample the raster (n_samples, xys). return_array : bool (opt), default=False By default the extracted pixel values are returned as a geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel values are returned as a tuple of numpy.ndarrays. progress : bool (opt), default=False Show a progress bar for extraction. Returns ------- geopandas.GeoDataframe Containing extracted data as point geometries if `return_array=False`. numpy.ndarray 2d masked array containing sampled raster values (sample, bands) at the x,y locations. """ # extract pixel values dtype = np.find_common_type([np.float32], self.dtypes) X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype) if progress is True: disable_tqdm = False else: disable_tqdm = True for i, (layer, pbar) in enumerate( zip(self.iloc, tqdm(self.iloc, total=self.count, disable=disable_tqdm))): sampler = sample_gen(dataset=layer.ds, xy=xys, indexes=layer.bidx, masked=True) v = np.ma.asarray([i for i in sampler]) X[:, i] = v.flatten() # return as geopandas array as default (or numpy arrays) if return_array is False: gdf = pd.DataFrame(X, columns=self.names) gdf["geometry"] = list(zip(xys[:, 0], xys[:, 1])) gdf["geometry"] = gdf["geometry"].apply(Point) gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs=self.crs) return gdf return X
def extract_raster(self, src, return_array=False, progress=False): """Sample a Raster object by an aligned raster of labelled pixels. Parameters ---------- src: rasterio DatasetReader Single band raster containing labelled pixels as an open rasterio DatasetReader object. return_array : bool (opt), default=False By default the extracted pixel values are returned as a geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel values are returned as a tuple of numpy.ndarrays. progress : bool (opt), default=False Show a progress bar for extraction. Returns ------- geopandas.GeoDataFrame Geodataframe containing extracted data as point features if `return_array=False` tuple with three items if `return_array is True - numpy.ndarray Numpy masked array of extracted raster values, typically 2d. - numpy.ndarray 1d numpy masked array of labelled sampled. - numpy.ndarray 2d numpy masked array of row and column indexes of training pixels. """ # open response raster and get labelled pixel indices and values arr = src.read(1, masked=True) rows, cols = np.nonzero(~arr.mask) xys = np.transpose(rasterio.transform.xy(src.transform, rows, cols)) ys = arr.data[rows, cols] # extract Raster object values at row, col indices dtype = np.find_common_type([np.float32], self.dtypes) X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype) if progress is True: disable_tqdm = False else: disable_tqdm = True for i, (layer, pbar) in enumerate( zip(self.iloc, tqdm(self.iloc, total=self.count, disable=disable_tqdm))): sampler = sample_gen(dataset=layer.ds, xy=xys, indexes=layer.bidx, masked=True) v = np.ma.asarray([i for i in sampler]) X[:, i] = v.flatten() # summarize data if return_array is False: column_names = ["value"] + self.names gdf = pd.DataFrame(data=np.ma.column_stack((ys, X)), columns=column_names) gdf["geometry"] = list(zip(xys[:, 0], xys[:, 1])) gdf["geometry"] = gdf["geometry"].apply(Point) gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs=self.crs) return gdf return X, ys, xys
def extract_vector(self, gdf, return_array=False, progress=False): """Sample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features. Parameters ---------- gdf: geopandas.GeoDataFrame Containing either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled. return_array : bool (opt), default=False By default the extracted pixel values are returned as a geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel values are returned as a tuple of numpy.ndarrays. progress : bool (opt), default=False Show a progress bar for extraction. Returns ------- geopandas.GeoDataframe Containing extracted data as point geometries if `return_array=False`. tuple A tuple (geodataframe index, extracted values, coordinates) of the extracted raster values as a masked array and the coordinates of the extracted pixels if `as_gdf=False`. """ # rasterize polygon and line geometries if all(gdf.geom_type == "Polygon") or all( gdf.geom_type == "LineString"): shapes = [(geom, val) for geom, val in zip(gdf.geometry, gdf.index)] arr = np.ma.zeros((self.height, self.width)) arr[:] = -99999 arr = features.rasterize( shapes=shapes, fill=-99999, out=arr, transform=self.transform, all_touched=True, ) ids = arr[np.nonzero(arr != -99999)] ids = ids.astype("int") rows, cols = np.nonzero(arr != -99999) xys = rasterio.transform.xy(transform=self.transform, rows=rows, cols=cols) xys = np.transpose(xys) elif all(gdf.geom_type == "Point"): ids = gdf.index.values xys = gdf.bounds.iloc[:, 2:].values # extract raster pixels dtype = np.find_common_type([np.float32], self.dtypes) X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype) if progress is True: disable_tqdm = False else: disable_tqdm = True for i, (layer, pbar) in enumerate( zip(self.iloc, tqdm(self.iloc, total=self.count, disable=disable_tqdm))): sampler = sample_gen(dataset=layer.ds, xy=xys, indexes=layer.bidx, masked=True) v = np.ma.asarray([i for i in sampler]) X[:, i] = v.flatten() # return as geopandas array as default (or numpy arrays) if return_array is False: X = pd.DataFrame(np.ma.column_stack((ids, X)), columns=["id"] + self.names) X.id = X.id.astype("int") X["geometry"] = list(zip(xys[:, 0], xys[:, 1])) X["geometry"] = X["geometry"].apply(Point) X = gpd.GeoDataFrame(X, geometry="geometry", crs=self.crs) return X return ids, X, xys
def multipliers(point_path, dst, country="conus"): """Use the connection data frame to create the regional multipliers. Parameters ---------- point_path : str Path to input supply curve point file. dst : str Path to output multiplier file. country : str String representation for the country the point path represents. So far only 'conus' is available. Returns ------- None """ # Get supply curve points and multipliers pnts = pd.read_csv(point_path) mult_lkup = pd.read_csv(MULTIPLIER_PATHS[country]) # Get the projected coordinates of the points to match the reeds geotiff with rio.open('/projects/rev/data/conus/reeds_regions.tif') as fin: proj = Proj(fin.crs.to_proj4()) eastings, northings = proj(pnts.longitude.values, pnts.latitude.values) pnts['eastings'] = eastings pnts['northings'] = northings # Get the reeds regions associated with each point with rio.open('/projects/rev/data/conus/reeds_regions.tif') as fin: generator = sample_gen(fin, pnts[['eastings', 'northings']].values) results = [x[0] for x in generator] pnts['reeds_demand_region'] = results pnts_mults = pd.merge(pnts, mult_lkup, on='reeds_demand_region', how='left') # Make sure the multiplier dimensions match the points try: assert pnts_mults.shape[0] == pnts.shape[0] except AssertionError: raise ("Supply curve and multiplier point dimensions do not match.") # Find points with no multipliers and assign nearest neighbors misses = pnts_mults[pd.isnull(pnts_mults.trans_multiplier)] hits = pnts_mults[~pd.isnull(pnts_mults.trans_multiplier)] hits_tree = cKDTree(hits[['eastings', 'northings']].values) dist, idx = hits_tree.query(misses[['eastings', 'northings']].values) nearests = hits.iloc[idx].trans_multiplier.values pnts_mults.loc[misses.index.values, 'trans_multiplier'] = nearests try: n_missing = len(pnts_mults[pd.isnull(pnts_mults.trans_multiplier)]) assert n_missing == 0 except AssertionError: raise ("Nearest neighbor search for missing mutlipliers failed.") # Save cols = ['sc_point_gid', 'trans_multiplier'] df = pnts_mults[cols] df.to_csv(dst)
def extract_vector(self, gdf, return_array=False, progress=False): """Sample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features. Parameters ---------- gdf: geopandas.GeoDataFrame Containing either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled. return_array : bool (opt), default=False By default the extracted pixel values are returned as a geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel values are returned as a tuple of numpy.ndarrays. progress : bool (opt), default=False Show a progress bar for extraction. Returns ------- geopandas.GeoDataframe Containing extracted data as point geometries (one point per pixel) if `return_array=False`. The resulting GeoDataFrame is indexed using a named pandas.MultiIndex, with `pixel_idx` index referring to the index of each pixel that was sampled, and the `geometry_idx` index referring to the index of the each geometry in the supplied `gdf`. This makes it possible to keep track of how sampled pixel relates to the original geometries, i.e. multiple pixels being extracted within the area of a single polygon that can be referred to using the `geometry_idx`. The extracted data can subsequently be joined with the attribute table of the supplied `gdf` using: training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, ("id", "label")], left_on="polygon_idx", right_on="id", right_index=True ) tuple A tuple (geodataframe index, extracted values, coordinates) of the extracted raster values as a masked array and the coordinates of the extracted pixels if `as_gdf=False`. """ # rasterize polygon and line geometries if all(gdf.geom_type == "Polygon") or all( gdf.geom_type == "LineString"): shapes = [(geom, val) for geom, val in zip(gdf.geometry, gdf.index)] arr = np.ma.zeros((self.height, self.width)) arr[:] = -99999 arr = features.rasterize( shapes=shapes, fill=-99999, out=arr, transform=self.transform, all_touched=True, ) ids = arr[np.nonzero(arr != -99999)] ids = ids.astype("int") rows, cols = np.nonzero(arr != -99999) xys = rasterio.transform.xy(transform=self.transform, rows=rows, cols=cols) xys = np.transpose(xys) elif all(gdf.geom_type == "Point"): ids = gdf.index.values xys = gdf.bounds.iloc[:, 2:].values # extract raster pixels dtype = np.find_common_type([np.float32], self.dtypes) X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype) if progress is True: disable_tqdm = False else: disable_tqdm = True for i, (layer, pbar) in enumerate( zip(self.iloc, tqdm(self.iloc, total=self.count, disable=disable_tqdm))): sampler = sample_gen(dataset=layer.ds, xy=xys, indexes=layer.bidx, masked=True) v = np.ma.asarray([i for i in sampler]) X[:, i] = v.flatten() # return as geopandas array as default (or numpy arrays) if return_array is False: X = pd.DataFrame(data=X, columns=self.names, index=[pd.RangeIndex(0, X.shape[0]), ids]) X.index.set_names(["pixel_idx", "geometry_idx"], inplace=True) X["geometry"] = list(zip(xys[:, 0], xys[:, 1])) X["geometry"] = X["geometry"].apply(Point) X = gpd.GeoDataFrame(X, geometry="geometry", crs=self.crs) return X return ids, X, xys