def test_vrt_src_kept_alive(path_rgb_byte_tif): """VRT source dataset is kept alive, preventing crashes""" with rasterio.open(path_rgb_byte_tif) as dst: vrt = WarpedVRT(dst, crs="EPSG:3857") assert (vrt.read() != 0).any() vrt.close()
def flow_from_dataframe(self, dataframe, width=0, height=0, batch_size=0): """extracts data from source based on sample extents Args: dataframe (geodataframe): dataframe with spatial extents width (int): array width height (int): array height batch_size (int): batch size to process Returns: Iterator[ndarray] """ width = width if width else self.width height = height if height else self.height if width < 1 or height < 1: raise ValueError('desired sample size must be set') batch_size = batch_size if batch_size else self.batch_size if batch_size < 1: raise ValueError('batch size must be specified') # TODO should reprojection be handled here or externally? # TODO Is there equivelancy check for projections? #df = dataframe.to_crs(self.crs) if self.crs else dataframe df = dataframe # TODO this finds the average sample area and computes the desired # resolution based on it and sample size. Probably not what is # needed. Assume uniform sized samples for now. # TODO the apply seems VERY slow but other options seem slower # including, oddly, the vectorized version # i.e. xres = (df.bounds.maxx - df.bounds.minx).mean() / width #xres = df.bounds.apply(lambda row: row.maxx - row.minx, # axis=1).mean() / width #yres = df.bounds.apply(lambda row: row.maxy - row.miny, # axis=1).mean() / height #xres = (df.bounds.iloc[0].maxx - df.bounds.iloc[0].minx) / width #yres = (df.bounds.iloc[0].maxy - df.bounds.iloc[0].miny) / height minx, miny, maxx, maxy = df.iloc[0].geometry.bounds xres, yres = (maxx - minx) / width, (maxy - miny) / height minx, miny, maxx, maxy = df.total_bounds width = (maxx - minx) / xres height = (maxy - miny) / yres transform = rasterio.transform.from_origin(minx, maxy, xres, yres) # use VRT to ensure correct projection and size vrt = WarpedVRT(self.src, crs=df.crs, width=width, height=height, transform=transform, resampling=self.resampling) for i in range(0, len(df), batch_size): yield self.get_batch(vrt, df.iloc[i:i + batch_size]['geometry']) vrt.close()
def test_vrt_mem_src_kept_alive(path_rgb_byte_tif): """VRT in-memory source dataset is kept alive, preventing crashes""" with open(path_rgb_byte_tif, "rb") as fp: bands = fp.read() with MemoryFile(bands) as memfile, memfile.open() as dst: vrt = WarpedVRT(dst, crs="EPSG:3857") assert (vrt.read() != 0).any() vrt.close()
class GCPCOGReader(COGReader): """Custom COG Reader with GCPS support. Attributes: src_dataset (DatasetReader): rasterio openned dataset. dataset (WarpedVRT): rasterio WarpedVRT dataset. """ def __attrs_post_init__(self): """Define _kwargs, open dataset and get info.""" if self.nodata is not None: self._kwargs["nodata"] = self.nodata if self.unscale is not None: self._kwargs["unscale"] = self.unscale if self.resampling_method is not None: self._kwargs["resampling_method"] = self.resampling_method if self.vrt_options is not None: self._kwargs["vrt_options"] = self.vrt_options if self.post_process is not None: self._kwargs["post_process"] = self.post_process self.src_dataset = rasterio.open(self.filepath) self.dataset = WarpedVRT( self.src_dataset, src_crs=self.src_dataset.gcps[1], src_transform=transform.from_gcps(self.src_dataset.gcps[0]), ) self.nodata = self.nodata if self.nodata is not None else self.dataset.nodata self.bounds = transform_bounds( self.dataset.crs, constants.WGS84_CRS, *self.dataset.bounds, densify_pts=21 ) if self.minzoom is None or self.maxzoom is None: self._set_zooms() if self.colormap is None: self._get_colormap() return self def close(self): """Close rasterio dataset.""" self.dataset.close() self.src_dataset.close()
def flow_from_dataframe(self, dataframe, width=0, height=0, batch_size=0): """extracts data from source based on sample extents Args: dataframe (geodataframe): dataframe with spatial extents batch_size (int): batch size to process (default=32) Returns: """ width = width if width else self.width height = height if height else self.height if width < 1 or height < 1: raise ValueError('desired sample size must be set') batch_size = batch_size if batch_size else self.batch_size if batch_size < 1: raise ValueError('batch size must be specified') df = dataframe.to_crs(self.crs) if self.crs else dataframe xres = df.bounds.apply(lambda row: row.maxx - row.minx, axis=1).mean() / width yres = df.bounds.apply(lambda row: row.maxy - row.miny, axis=1).mean() / height minx, miny, maxx, maxy = df.total_bounds width = (maxx - minx) / xres height = (maxy - miny) / yres transform = rasterio.transform.from_origin(minx, maxy, xres, yres) # use VRT to ensure correct projection and size vrt = WarpedVRT(self.src, crs=df.crs, width=width, height=height, transform=transform, resampling=self.resampling) for i in range(0, len(df), batch_size): yield self.get_batch(vrt, df.iloc[i:i+batch_size]['geometry']) vrt.close()
class GCPCOGReader(COGReader): """Custom COG Reader with GCPS support. Attributes: filepath (str): Cloud Optimized GeoTIFF path. src_dataset (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT, optional): Rasterio dataset. tms (morecantile.TileMatrixSet, optional): TileMatrixSet grid definition. Defaults to `WebMercatorQuad`. minzoom (int, optional): Overwrite Min Zoom level. maxzoom (int, optional): Overwrite Max Zoom level. colormap (dict, optional): Overwrite internal colormap. nodata (int or float or str, optional): Global options, overwrite internal nodata value. unscale (bool, optional): Global options, apply internal scale and offset on all read operations. resampling_method (rasterio.enums.Resampling, optional): Global options, resampling method to use for read operations. vrt_options (dict, optional): Global options, WarpedVRT options to use for read operations. post_process (callable, optional): Global options, Function to apply after all read operations. dataset (rasterio.vrtWarpedVRT): Warped VRT constructed with dataset GCPS info. **READ ONLY attribute**. Examples: >>> with COGReader(src_path) as cog: cog.tile(...) assert cog.dataset assert cog.src_dataset >>> with rasterio.open(src_path) as src_dst: with COGReader(None, src_dataset=src_dst) as cog: cog.tile(...) """ filepath: str = attr.ib() src_dataset: Union[DatasetReader, DatasetWriter, MemoryFile, WarpedVRT] = attr.ib(default=None) tms: TileMatrixSet = attr.ib(default=constants.WEB_MERCATOR_TMS) minzoom: int = attr.ib(default=None) maxzoom: int = attr.ib(default=None) colormap: Dict = attr.ib(default=None) # Define global options to be forwarded to functions reading the data (e.g `rio_tiler.reader.read`) nodata: Optional[Union[float, int, str]] = attr.ib(default=None) unscale: Optional[bool] = attr.ib(default=None) resampling_method: Optional[Resampling] = attr.ib(default=None) vrt_options: Optional[Dict] = attr.ib(default=None) post_process: Optional[Callable[[numpy.ndarray, numpy.ndarray], Tuple[numpy.ndarray, numpy.ndarray]]] = attr.ib( default=None) # for GCPCOGReader, dataset is not a input option. dataset: WarpedVRT = attr.ib(init=False) # We use _kwargs to store values of nodata, unscale, vrt_options and resampling_method. # _kwargs is used avoid having to set those values on each method call. _kwargs: Dict[str, Any] = attr.ib(init=False, factory=dict) def __attrs_post_init__(self): """Define _kwargs, open dataset and get info.""" self.src_dataset = self.src_dataset or rasterio.open(self.filepath) self.dataset = WarpedVRT( self.src_dataset, src_crs=self.src_dataset.gcps[1], src_transform=transform.from_gcps(self.src_dataset.gcps[0]), ) super().__attrs_post_init__() def close(self): """Close rasterio dataset.""" self.dataset.close() if self.filepath: self.src_dataset.close()