def test_wrap_file(path_rgb_byte_tif): """A VirtualVRT has the expected dataset properties.""" with rasterio.open(path_rgb_byte_tif) as src: vrt = WarpedVRT(src, dst_crs='EPSG:3857') assert vrt.crs == 'EPSG:3857' assert tuple(round(x, 1) for x in vrt.bounds) == ( -8789636.7, 2700460.0, -8524406.4, 2943560.2) assert vrt.name.startswith('WarpedVRT(') assert vrt.name.endswith('tests/data/RGB.byte.tif)') assert vrt.indexes == (1, 2, 3) assert vrt.nodatavals == (0, 0, 0) assert vrt.dtypes == ('uint8', 'uint8', 'uint8') assert vrt.read().shape == (3, 736, 803)
def test_wrap_file(path_rgb_byte_tif): """A VirtualVRT has the expected dataset properties.""" with rasterio.open(path_rgb_byte_tif) as src: vrt = WarpedVRT(src, crs=DST_CRS) assert vrt.crs == CRS.from_string(DST_CRS) assert tuple(round(x, 1) for x in vrt.bounds) == ( -8789636.7, 2700460.0, -8524406.4, 2943560.2 ) assert vrt.name.startswith("WarpedVRT(") assert vrt.name.endswith("tests/data/RGB.byte.tif)") assert vrt.indexes == (1, 2, 3) assert vrt.nodatavals == (0, 0, 0) assert vrt.dtypes == ("uint8", "uint8", "uint8") assert vrt.read().shape == (3, 736, 803)
def test_warped_vrt_msk_nodata(path_rgb_msk_byte_tif, caplog): """Specifying dst nodata also works for source with .msk""" with rasterio.open(path_rgb_msk_byte_tif) as src: vrt = WarpedVRT(src, crs=DST_CRS, nodata=0.0) assert vrt.dst_crs == CRS.from_string(DST_CRS) assert vrt.src_nodata is None assert vrt.dst_nodata == 0.0 assert vrt.count == 3 assert vrt.mask_flag_enums == ([MaskFlags.nodata],) * 3 caplog.set_level(logging.DEBUG) with rasterio.Env(CPL_DEBUG=True): masks = vrt.read_masks() assert masks[0, 0, 0] == 0 assert masks[0].mean() > 0 assert "RGB2.byte.tif.msk" in caplog.text
def _getitem(self, key): from rasterio.vrt import WarpedVRT band_key, window, squeeze_axis, np_inds = self._get_indexer(key) if not band_key or any(start == stop for (start, stop) in window): # no need to do IO shape = (len(band_key),) + tuple( stop - start for (start, stop) in window) out = np.zeros(shape, dtype=self.dtype) else: with self.lock: riods = self.manager.acquire(needs_lock=False) if self.vrt_params is not None: riods = WarpedVRT(riods, **self.vrt_params) out = riods.read(band_key, window=window) if squeeze_axis: out = np.squeeze(out, axis=squeeze_axis) return out[np_inds]
def test_warped_vrt_msk_add_alpha(path_rgb_msk_byte_tif, caplog): """Add an alpha band to the VRT to access per-dataset mask of a source""" with rasterio.open(path_rgb_msk_byte_tif) as src: vrt = WarpedVRT(src, crs=DST_CRS, add_alpha=True) assert vrt.src_nodata is None assert vrt.dst_nodata is None assert vrt.count == 4 assert vrt.mask_flag_enums == ( [MaskFlags.per_dataset, MaskFlags.alpha], ) * 3 + ( [MaskFlags.all_valid], ) caplog.set_level(logging.DEBUG) with rasterio.Env(CPL_DEBUG=True): masks = vrt.read_masks() assert masks[0, 0, 0] == 0 assert masks[0].mean() > 0 assert "RGB2.byte.tif.msk" in caplog.text
def _tile_read( src_dst, bounds, tilesize, indexes=None, nodata=None, resampling_method="bilinear", tile_edge_padding=2, dst_crs=CRS({"init": "EPSG:3857"}), bounds_crs=None, minimum_tile_cover=None, warp_vrt_option={}, ): """ Read data and mask. Attributes ---------- src_dst : rasterio.io.DatasetReader rasterio.io.DatasetReader object bounds : list Output bounds (left, bottom, right, top) in target crs ("dst_crs"). tilesize : int Output image size indexes : list of ints or a single int, optional, (defaults: None) If `indexes` is a list, the result is a 3D array, but is a 2D array if it is a band index number. nodata: int or float, optional (defaults: None) resampling_method : str, optional (default: "bilinear") Resampling algorithm. tile_edge_padding : int, optional (default: 2) Padding to apply to each edge of the tile when retrieving data to assist in reducing resampling artefacts along edges. dst_crs: CRS or str, optional Target coordinate reference system (default "epsg:3857"). bounds_crs: CRS or str, optional Overwrite bounds coordinate reference system (default None, equal to dst_crs). minimum_tile_cover: float, optional (default: None) Minimum % overlap for which to raise an error with dataset not covering enought of the tile. warp_vrt_option: dict, optional (default: {}) These will be passed to the rasterio.warp.WarpedVRT class. Returns ------- data : numpy ndarray mask: numpy array """ if isinstance(indexes, int): indexes = [indexes] elif isinstance(indexes, tuple): indexes = list(indexes) if not bounds_crs: bounds_crs = dst_crs bounds = transform_bounds(bounds_crs, dst_crs, *bounds, densify_pts=21) vrt_params = dict(add_alpha=True, crs=dst_crs, resampling=Resampling[resampling_method]) vrt_transform, vrt_width, vrt_height = get_vrt_transform(src_dst, bounds, dst_crs=dst_crs) out_window = windows.Window(col_off=0, row_off=0, width=vrt_width, height=vrt_height) src_bounds = transform_bounds(src_dst.crs, dst_crs, *src_dst.bounds, densify_pts=21) x_overlap = max( 0, min(src_bounds[2], bounds[2]) - max(src_bounds[0], bounds[0])) y_overlap = max( 0, min(src_bounds[3], bounds[3]) - max(src_bounds[1], bounds[1])) cover_ratio = (x_overlap * y_overlap) / ((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) if minimum_tile_cover and cover_ratio < minimum_tile_cover: raise TileOutsideBounds( "Dataset covers less than {:.0f}% of tile".format(cover_ratio * 100)) if tile_edge_padding > 0 and not _requested_tile_aligned_with_internal_tile( src_dst, bounds, tilesize): vrt_transform = vrt_transform * Affine.translation( -tile_edge_padding, -tile_edge_padding) orig_vrt_height = vrt_height orig_vrt_width = vrt_width vrt_height = vrt_height + 2 * tile_edge_padding vrt_width = vrt_width + 2 * tile_edge_padding out_window = windows.Window( col_off=tile_edge_padding, row_off=tile_edge_padding, width=orig_vrt_width, height=orig_vrt_height, ) vrt_params.update( dict(transform=vrt_transform, width=vrt_width, height=vrt_height)) indexes = indexes if indexes is not None else src_dst.indexes out_shape = (len(indexes), tilesize, tilesize) nodata = nodata if nodata is not None else src_dst.nodata if nodata is not None: vrt_params.update( dict(nodata=nodata, add_alpha=False, src_nodata=nodata)) if has_alpha_band(src_dst): vrt_params.update(dict(add_alpha=False)) vrt_params.update(warp_vrt_option) with WarpedVRT(src_dst, **vrt_params) as vrt: data = vrt.read( out_shape=out_shape, indexes=indexes, window=out_window, resampling=Resampling[resampling_method], ) mask = vrt.dataset_mask(out_shape=(tilesize, tilesize), window=out_window) return data, mask
def point( src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT], coordinates: Tuple[float, float], indexes: Optional[Union[Sequence[int], int]] = None, coord_crs: CRS = constants.WGS84_CRS, masked: bool = True, nodata: Optional[Union[float, int, str]] = None, unscale: bool = False, resampling_method: Resampling = "nearest", vrt_options: Optional[Dict] = None, post_process: Optional[Callable[[numpy.ndarray, numpy.ndarray], Tuple[numpy.ndarray, numpy.ndarray]]] = None, ) -> List: """Read a pixel value for a point. Args: src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset. coordinates (tuple): Coordinates in form of (X, Y). indexes (sequence of int or int, optional): Band indexes. coord_crs (rasterio.crs.CRS, optional): Coordinate Reference System of the input coords. Defaults to `epsg:4326`. masked (bool): Mask samples that fall outside the extent of the dataset. Defaults to `True`. nodata (int or float, optional): Overwrite dataset internal nodata value. unscale (bool, optional): Apply 'scales' and 'offsets' on output data value. Defaults to `False`. resampling_method (rasterio.enums.Resampling, optional): Rasterio's resampling algorithm. Defaults to `nearest`. vrt_options (dict, optional): Options to be passed to the rasterio.warp.WarpedVRT class. post_process (callable, optional): Function to apply on output data and mask values. Returns: list: Pixel value per band indexes. """ if isinstance(indexes, int): indexes = (indexes, ) lon, lat = transform_coords(coord_crs, src_dst.crs, [coordinates[0]], [coordinates[1]]) if not ((src_dst.bounds[0] < lon[0] < src_dst.bounds[2]) and (src_dst.bounds[1] < lat[0] < src_dst.bounds[3])): raise PointOutsideBounds("Point is outside dataset bounds") indexes = indexes if indexes is not None else src_dst.indexes vrt_params: Dict[str, Any] = { "add_alpha": True, "resampling": Resampling[resampling_method], } nodata = nodata if nodata is not None else src_dst.nodata if nodata is not None: vrt_params.update({ "nodata": nodata, "add_alpha": False, "src_nodata": nodata }) if has_alpha_band(src_dst): vrt_params.update({"add_alpha": False}) if vrt_options: vrt_params.update(vrt_options) with WarpedVRT(src_dst, **vrt_params) as vrt_dst: values = list( vrt_dst.sample([(lon[0], lat[0])], indexes=indexes, masked=masked))[0] point_values = values.data mask = values.mask * 255 if masked else numpy.zeros(point_values.shape) if unscale: point_values = point_values.astype("float32", casting="unsafe") numpy.multiply(point_values, vrt_dst.scales[0], out=point_values, casting="unsafe") numpy.add(point_values, vrt_dst.offsets[0], out=point_values, casting="unsafe") if post_process: point_values, _ = post_process(point_values, mask) return point_values.tolist()
def test_deprecated_param(path_rgb_byte_tif): """dst_crs is deprecated""" with rasterio.open(path_rgb_byte_tif) as src: with pytest.warns(RasterioDeprecationWarning): vrt = WarpedVRT(src, dst_crs=DST_CRS) assert vrt.dst_crs == CRS.from_string(DST_CRS)
def point( src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT], coordinates: Tuple[float, float], indexes: Optional[Union[Sequence[int], int]] = None, coord_crs: CRS = constants.WGS84_CRS, nodata: Optional[Union[float, int, str]] = None, unscale: bool = False, masked: bool = True, vrt_options: Optional[Dict] = None, ) -> List: """ Read point value Attributes ---------- src_dst : rasterio.io.DatasetReader rasterio.io.DatasetReader object coordinates : tuple (X, Y) coordinates. indexes : list of ints or a single int, optional Band indexes coord_crs : rasterio.crs.CRS, optional (X, Y) coordinate system. Default is WGS84/EPSG:4326. nodata: int or float, optional unscale, bool, optional If True, apply scale and offset to the data. Default is set to False. masked : bool Whether to mask samples that fall outside the extent of the dataset. Default is set to True. vrt_options: dict, optional These will be passed to the rasterio.warp.WarpedVRT class. Returns ------- point : list List of pixel values per bands indexes. """ if isinstance(indexes, int): indexes = (indexes,) lon, lat = transform_coords( coord_crs, src_dst.crs, [coordinates[0]], [coordinates[1]] ) if not ( (src_dst.bounds[0] < lon[0] < src_dst.bounds[2]) and (src_dst.bounds[1] < lat[0] < src_dst.bounds[3]) ): raise PointOutsideBounds("Point is outside dataset bounds") indexes = indexes if indexes is not None else src_dst.indexes vrt_params: Dict[str, Any] = {"add_alpha": True} nodata = nodata if nodata is not None else src_dst.nodata if nodata is not None: vrt_params.update({"nodata": nodata, "add_alpha": False, "src_nodata": nodata}) if has_alpha_band(src_dst): vrt_params.update({"add_alpha": False}) if vrt_options: vrt_params.update(vrt_options) with WarpedVRT(src_dst, **vrt_params) as vrt_dst: point_values = list( vrt_dst.sample([(lon[0], lat[0])], indexes=indexes, masked=masked) )[0] if unscale: point_values = point_values.astype("float32", casting="unsafe") numpy.multiply( point_values, vrt_dst.scales[0], out=point_values, casting="unsafe" ) numpy.add( point_values, vrt_dst.offsets[0], out=point_values, casting="unsafe" ) return point_values.tolist()
def test_warped_vrt_source(path_rgb_byte_tif): """A VirtualVRT has the expected source dataset.""" with rasterio.open(path_rgb_byte_tif) as src: vrt = WarpedVRT(src, crs=DST_CRS) assert vrt.src_dataset == src
def test_no_add_alpha_read(path_rgb_msk_byte_tif): """An alpha band is not added if add_alpha=False""" with rasterio.open(path_rgb_msk_byte_tif) as src, WarpedVRT( src, add_alpha=False) as vrt: assert vrt.count == 3
def warp(filename, resampling='nearest', bounds=None, crs=None, res=None, nodata=0, warp_mem_limit=512, num_threads=1, tap=False, tac=None): """ Warps an image to a VRT object Args: filename (str): The input file name. resampling (Optional[str]): The resampling method. Choices are ['average', 'bilinear', 'cubic', 'cubic_spline', 'gauss', 'lanczos', 'max', 'med', 'min', 'mode', 'nearest']. bounds (Optional[tuple]): The extent bounds to warp to. crs (Optional[``CRS`` | int | dict | str]): The CRS to warp to. res (Optional[tuple]): The cell resolution to warp to. nodata (Optional[int or float]): The 'no data' value. warp_mem_limit (Optional[int]): The memory limit (in MB) for the ``rasterio.vrt.WarpedVRT`` function. num_threads (Optional[int]): The number of warp worker threads. tap (Optional[bool]): Whether to target align pixels. tac (Optional[tuple]): Target aligned raster coordinates (x, y). Returns: ``rasterio.vrt.WarpedVRT`` """ if crs: dst_crs = check_crs(crs) else: dst_crs = check_file_crs(filename) src_crs = check_file_crs(filename) with rio.open(filename) as src: src_info = get_file_info(src) if res: dst_res = check_res(res) else: dst_res = src_info.src_res # Check if the data need to be subset if (bounds is None) or (tuple(bounds) == tuple(src_info.src_bounds)): if crs: left_coord, bottom_coord, right_coord, top_coord = transform_bounds(src_crs, dst_crs, src_info.src_bounds.left, src_info.src_bounds.bottom, src_info.src_bounds.right, src_info.src_bounds.top, densify_pts=21) dst_bounds = BoundingBox(left=left_coord, bottom=bottom_coord, right=right_coord, top=top_coord) else: dst_bounds = src_info.src_bounds else: # Ensure that the user bounds object is a ``BoundingBox`` if isinstance(bounds, BoundingBox): dst_bounds = bounds elif isinstance(bounds, str): if bounds.startswith('BoundingBox'): left_coord, bottom_coord, right_coord, top_coord = unpack_bounding_box(bounds) else: logger.exception(' The bounds were not accepted.') raise TypeError dst_bounds = BoundingBox(left=left_coord, bottom=bottom_coord, right=right_coord, top=top_coord) elif isinstance(bounds, tuple) or isinstance(bounds, list) or isinstance(bounds, np.ndarray): dst_bounds = BoundingBox(left=bounds[0], bottom=bounds[1], right=bounds[2], top=bounds[3]) else: logger.exception(f' The bounds type was not understood. Bounds should be given as a rasterio.coords.BoundingBox, tuple, or ndarray, not a {type(bounds)}.') raise TypeError dst_width = int((dst_bounds.right - dst_bounds.left) / dst_res[0]) dst_height = int((dst_bounds.top - dst_bounds.bottom) / dst_res[1]) # Do not warp if all the key metadata match the reference information if (tuple(src_info.src_bounds) == tuple(bounds)) and \ (src_info.src_res == dst_res) and \ (src_crs == dst_crs) and \ (src_info.src_width == dst_width) and \ (src_info.src_height == dst_height) and \ ('.nc' not in filename.lower()): output = filename else: src_transform = Affine(src_info.src_res[0], 0.0, src_info.src_bounds.left, 0.0, -src_info.src_res[1], src_info.src_bounds.top) dst_transform = Affine(dst_res[0], 0.0, dst_bounds.left, 0.0, -dst_res[1], dst_bounds.top) if tac: # Align the cells to target coordinates tap_left = tac[0][np.abs(tac[0] - dst_bounds.left).argmin()] tap_top = tac[1][np.abs(tac[1] - dst_bounds.top).argmin()] dst_transform = Affine(dst_res[0], 0.0, tap_left, 0.0, -dst_res[1], tap_top) if tap: # Align the cells to the resolution dst_transform, dst_width, dst_height = aligned_target(dst_transform, dst_width, dst_height, dst_res) vrt_options = {'resampling': getattr(Resampling, resampling), 'src_crs': src_crs, 'crs': dst_crs, 'src_transform': src_transform, 'transform': dst_transform, 'height': dst_height, 'width': dst_width, 'nodata': nodata, 'warp_mem_limit': warp_mem_limit, 'warp_extras': {'multi': True, 'warp_option': f'NUM_THREADS={num_threads}'}} with WarpedVRT(src, **vrt_options) as vrt: output = vrt return output
if not out_dir.exists(): os.makedirs(out_dir) ### Get input area mask print("Extracting SA input area mask...") mask, transform, window = get_input_area_mask("sa") print("Reading and warping SA Blueprint...") outfilename = out_dir / "sa_blueprint.tif" with rasterio.open(src_dir / "blueprint2021.tif") as src: nodata = int(src.nodata) vrt = WarpedVRT( src, width=window.width, height=window.height, nodata=nodata, transform=transform, crs=DATA_CRS, resampling=Resampling.nearest, ) data = vrt.read()[0] # apply mask data = np.where(mask == 1, data, nodata).astype("uint8") write_raster(outfilename, data, transform=transform, crs=DATA_CRS, nodata=nodata)
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, lock=None): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: from affine import Affine da = xr.open_rasterio('path_to_file.tif') transform = Affine.from_gdal(*da.attrs['transform']) nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform Parameters ---------- filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates : bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock : False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. Returns ------- data : DataArray The newly created DataArray. """ import rasterio from rasterio.vrt import WarpedVRT vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, dst_nodata=vrt.dst_nodata, tolerance=vrt.tolerance, transform=vrt.transform, width=vrt.width, height=vrt.height, warp_extras=vrt.warp_extras, ) if lock is None: lock = RASTERIO_LOCK manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r") riods = manager.acquire() if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None coords = OrderedDict() # Get bands if riods.count < 1: raise ValueError("Unknown dims") coords["band"] = np.asarray(riods.indexes) # Get coordinates if riods.transform.is_rectilinear: # 1d coordinates parse = True if parse_coordinates is None else parse_coordinates if parse: nx, ny = riods.width, riods.height # xarray coordinates are pixel centered x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * riods.transform _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * riods.transform coords["y"] = y coords["x"] = x else: # 2d coordinates parse = False if (parse_coordinates is None) else parse_coordinates if parse: warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3, ) # Attributes attrs = dict() # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) attrs["transform"] = tuple(riods.transform)[:6] if hasattr(riods, "crs") and riods.crs: # CRS is a dict-like object specific to rasterio # If CRS is not None, we convert it back to a PROJ4 string using # rasterio itself try: attrs["crs"] = riods.crs.to_proj4() except AttributeError: attrs["crs"] = riods.crs.to_string() if hasattr(riods, "res"): # (width, height) tuple of pixels in units of CRS attrs["res"] = riods.res if hasattr(riods, "is_tiled"): # Is the TIF tiled? (bool) # We cast it to an int for netCDF compatibility attrs["is_tiled"] = np.uint8(riods.is_tiled) if hasattr(riods, "nodatavals"): # The nodata values for the raster bands attrs["nodatavals"] = tuple(np.nan if nodataval is None else nodataval for nodataval in riods.nodatavals) if hasattr(riods, "scales"): # The scale values for the raster bands attrs["scales"] = riods.scales if hasattr(riods, "offsets"): # The offset values for the raster bands attrs["offsets"] = riods.offsets if hasattr(riods, "descriptions") and any(riods.descriptions): # Descriptions for each dataset band attrs["descriptions"] = riods.descriptions if hasattr(riods, "units") and any(riods.units): # A list of units string for each dataset band attrs["units"] = riods.units # Parse extra metadata from tags, if supported parsers = {"ENVI": _parse_envi, "GTiff": lambda m: m} driver = riods.driver if driver in parsers: if driver == "GTiff": meta = parsers[driver](riods.tags()) else: meta = parsers[driver](riods.tags(ns=driver)) for k, v in meta.items(): # Add values as coordinates if they match the band count, # as attributes otherwise if isinstance(v, (list, np.ndarray)) and len(v) == riods.count: coords[k] = ("band", np.asarray(v)) else: attrs[k] = v data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper(manager, lock, vrt_params)) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) result = DataArray(data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs) if chunks is not None: from dask.base import tokenize # augment the token with the file modification time try: mtime = os.path.getmtime(filename) except OSError: # the filename is probably an s3 bucket rather than a regular file mtime = None token = tokenize(filename, mtime, chunks) name_prefix = "open_rasterio-%s" % token result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable result._file_obj = manager return result
def load_src_vrt(self): """Load a source dataset's VRT into the destination CRS.""" vrt_params = dict(crs=CRS.from_epsg(self.dest_crs), resampling=getattr(Resampling, self.resampling), src_nodata=self.nodata, dst_nodata=self.nodata) return WarpedVRT(self.src, **vrt_params)
def warp(filename, resampling='nearest', bounds=None, crs=None, res=None, nodata=0, warp_mem_limit=512, num_threads=1, tap=False, tac=None): """ Warps an image to a VRT object Args: filename (str): The input file name. resampling (Optional[str]): The resampling method. Choices are ['average', 'bilinear', 'cubic', 'cubic_spline', 'gauss', 'lanczos', 'max', 'med', 'min', 'mode', 'nearest']. bounds (Optional[tuple]): The extent bounds to warp to. crs (Optional[``CRS`` | int | dict | str]): The CRS to warp to. res (Optional[tuple]): The cell resolution to warp to. nodata (Optional[int or float]): The 'no data' value. warp_mem_limit (Optional[int]): The memory limit (in MB) for the ``rasterio.vrt.WarpedVRT`` function. num_threads (Optional[int]): The number of warp worker threads. tap (Optional[bool]): Whether to target align pixels. tac (Optional[tuple]): Target aligned raster coordinates (x, y). Returns: ``rasterio.vrt.WarpedVRT`` """ with rio.open(filename) as src: if res: dst_res = check_res(res) else: dst_res = src.res if crs: dst_crs = check_crs(crs) else: dst_crs = check_src_crs(src) # Check if the data need to be subset if bounds and (bounds != src.bounds): if isinstance(bounds, str): if bounds.startswith('BoundingBox'): left_coord, bottom_coord, right_coord, top_coord = unpack_bounding_box(bounds) else: logger.exception(' The bounds were not accepted.') dst_bounds = BoundingBox(left=left_coord, bottom=bottom_coord, right=right_coord, top=top_coord) else: dst_bounds = BoundingBox(left=bounds[0], bottom=bounds[1], right=bounds[2], top=bounds[3]) else: dst_bounds = src.bounds dst_width = int((dst_bounds.right - dst_bounds.left) / dst_res[0]) dst_height = int((dst_bounds.top - dst_bounds.bottom) / dst_res[1]) # Do not warp if all the key metadata match the reference information if (src.bounds == bounds) and \ (src.res == dst_res) and \ (src.crs == dst_crs) and \ (src.width == dst_width) and \ (src.height == dst_height): output = filename else: dst_transform = Affine(dst_res[0], 0.0, dst_bounds.left, 0.0, -dst_res[1], dst_bounds.top) if tac: # Align the cells to target coordinates tap_left = tac[0][np.abs(tac[0] - dst_bounds.left).argmin()] tap_top = tac[1][np.abs(tac[1] - dst_bounds.top).argmin()] dst_transform = Affine(dst_res[0], 0.0, tap_left, 0.0, -dst_res[1], tap_top) if tap: # Align the cells to the resolution dst_transform, dst_width, dst_height = aligned_target(dst_transform, dst_width, dst_height, dst_res) vrt_options = {'resampling': getattr(Resampling, resampling), 'crs': dst_crs, 'transform': dst_transform, 'height': dst_height, 'width': dst_width, 'nodata': nodata, 'warp_mem_limit': warp_mem_limit, 'warp_extras': {'multi': True, 'warp_option': 'NUM_THREADS={:d}'.format(num_threads)}} with WarpedVRT(src, **vrt_options) as vrt: output = vrt return output
def read_window(src, bounds, target_shape, recipes=None): if recipes is None: recipes = {} source_resolution = get_resolution_in_meters( Bounds(src.bounds, src.crs), (src.height, src.width) ) target_resolution = get_resolution(bounds, target_shape) # GDAL chooses target extents such that reprojected pixels are square; this # may produce pixel offsets near the edges of projected bounds # http://lists.osgeo.org/pipermail/gdal-dev/2016-August/045046.html # # A workaround for this is to produce a VRT with the explicit target extent # in projected coordinates (assuming that the target CRS is known). # Otherwise, we could tweak the origin (.c, .f) of the generated # dst_transform, but that would require knowing projected bounds of all # CRSes in use. if ( "dem" in recipes and bounds.crs == WEB_MERCATOR_CRS and ( target_resolution[0] > source_resolution[0] and target_resolution[1] > source_resolution[1] ) ): # special case for web Mercator to prevent crosshatch artifacts; use a # target image size that most closely matches the source resolution # (and is a power of 2) zoom = min( 22, # going beyond this results in overflow within GDAL get_zoom( max( get_resolution_in_meters( Bounds(src.bounds, src.crs), (src.height, src.width) ) ), op=math.ceil, ), ) dst_width = dst_height = (2 ** zoom) * 256 extent = get_extent(bounds.crs) resolution = ( (extent[2] - extent[0]) / dst_width, (extent[3] - extent[1]) / dst_height ) dst_transform = Affine( resolution[0], 0.0, extent[0], 0.0, -resolution[1], extent[3] ) else: # if raster is overly-large, approximate the transform based on # a scaled-down version and scale it back after attempts = 0 scale_factor = 1 dst_transform = None resolution = None if ( target_resolution[0] < source_resolution[0] or target_resolution[1] < source_resolution[1] ): # provide resolution for improved resampling when overzooming resolution = target_resolution while ( dst_transform is None and src.width // scale_factor > 0 and src.height // scale_factor > 0 ): try: ( dst_transform, dst_width, dst_height ) = warp.calculate_default_transform( src.crs, bounds.crs, src.width // scale_factor, src.height // scale_factor, *src.bounds, resolution=resolution ) scale = Affine.scale(scale_factor, scale_factor) dst_transform *= ~scale dst_width, dst_height = scale * (dst_width, dst_height) except (MemoryError, CPLE_OutOfMemoryError): attempts += 1 scale_factor = 2 * attempts # Some OAM sources have invalid NODATA values (-1000 for a file with a # dtype of Byte). rasterio returns None under these circumstances # (indistinguishable from sources that actually have no NODATA values). # Providing a synthetic value "correctly" masks the output at the expense # of masking valid pixels with that value. This was previously (partially; # in the form of the bounding box but not NODATA pixels) addressed by # creating a VRT that mapped the mask to an alpha channel (something we # can't do w/o adding nDstAlphaBand to rasterio/_warp.pyx). # # Creating external masks and reading them separately (as below) is a # better solution, particularly as it avoids artifacts introduced when the # NODATA values are resampled using something other than nearest neighbor. resampling = Resampling[recipes.get("resample", "bilinear")] nodata = src.nodata or _nodata(src.meta["dtype"]) if "nodata" in recipes: nodata = recipes["nodata"] src_nodata = nodata add_alpha = False if any([MaskFlags.per_dataset in flags for flags in src.mask_flag_enums]): # prefer the mask if available src_nodata = None add_alpha = True with WarpedVRT( src, src_nodata=src_nodata, crs=bounds.crs, width=target_shape[0], height=target_shape[1], transform=transform.from_bounds( *bounds.bounds, width=target_shape[0], height=target_shape[0] ), resampling=resampling, add_alpha=add_alpha, ) as vrt: # NOTE rounding offsets (round_offsets()) eliminates 1px border at # 180º east (but not 85º south) at zoom 2 (with Blue Marble) dst_window = vrt.window(*bounds.bounds) data = vrt.read(out_shape=(vrt.count,) + target_shape, window=dst_window) if vrt.count > src.count: data = np.ma.masked_array(data[0:src.count], mask=[~data[-1]] * src.count) else: # mask with NODATA values if nodata is not None and vrt.nodata is not None: data = _mask(data, vrt.nodata) else: data = np.ma.masked_array(data, mask=np.ma.nomask) return PixelCollection(data, bounds)
def _raster_get_stats( src_dst, indexes=None, nodata=None, overview_level=None, max_size=1024, percentiles=(2, 98), dst_crs=CRS({"init": "EPSG:4326"}), histogram_bins=10, histogram_range=None, resampling_method="bilinear", warp_vrt_option={}, ): """ Retrieve dataset statistics. Attributes ---------- src_dst : rasterio.io.DatasetReader rasterio.io.DatasetReader object indexes : tuple, list, int, optional Dataset band indexes. nodata, int, optional Custom nodata value if not preset in dataset. overview_level : int, optional Overview (decimation) level to fetch. max_size: int, optional Maximum size of dataset to retrieve (will be used to calculate the overview level to fetch). percentiles : tulple, optional Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive (default: (2, 98)). dst_crs: CRS or dict Target coordinate reference system (default: EPSG:4326). histogram_bins: int, optional Defines the number of equal-width histogram bins (default: 10). histogram_range: tuple or list, optional The lower and upper range of the bins. If not provided, range is simply the min and max of the array. resampling_method : str, optional (default: "bilinear") Resampling algorithm. warp_vrt_option: dict, optional (default: {}) These will be passed to the rasterio.warp.WarpedVRT class. Returns ------- out : dict bounds, mercator zoom range, band descriptions and band statistics: (percentiles), min, max, stdev, histogram e.g. { 'bounds': { 'value': (145.72265625, 14.853515625, 145.810546875, 14.94140625), 'crs': '+init=EPSG:4326' }, 'minzoom': 8, 'maxzoom': 12, 'band_descriptions': [(1, 'red'), (2, 'green'), (3, 'blue'), (4, 'nir')] 'statistics': { 1: { 'pc': [38, 147], 'min': 20, 'max': 180, 'std': 28.123562304138662, 'histogram': [ [1625, 219241, 28344, 15808, 12325, 10687, 8535, 7348, 4656, 1208], [20.0, 36.0, 52.0, 68.0, 84.0, 100.0, 116.0, 132.0, 148.0, 164.0, 180.0] ] } ... 3: {...} 4: {...} } } """ if isinstance(indexes, int): indexes = [indexes] elif isinstance(indexes, tuple): indexes = list(indexes) levels = src_dst.overviews(1) width = src_dst.width height = src_dst.height indexes = indexes if indexes else src_dst.indexes nodata = nodata if nodata is not None else src_dst.nodata bounds = transform_bounds(src_dst.crs, dst_crs, *src_dst.bounds, densify_pts=21) minzoom, maxzoom = get_zooms(src_dst) def _get_descr(ix): """Return band description.""" name = src_dst.descriptions[ix - 1] if not name: name = "band{}".format(ix) return name band_descriptions = [(ix, _get_descr(ix)) for ix in indexes] if len(levels): if overview_level: decim = levels[overview_level] else: # determine which zoom level to read for ii, decim in enumerate(levels): if (max(_div_round_up(width, decim), _div_round_up(height, decim)) < max_size): break else: decim = 1 warnings.warn("Dataset has no overviews, reading the full dataset", NoOverviewWarning) out_shape = ( len(indexes), _div_round_up(height, decim), _div_round_up(width, decim), ) vrt_params = dict(add_alpha=True) if has_alpha_band(src_dst): vrt_params.update(dict(add_alpha=False)) if nodata is not None: vrt_params.update( dict(nodata=nodata, add_alpha=False, src_nodata=nodata)) vrt_params.update(warp_vrt_option) with WarpedVRT(src_dst, **vrt_params) as vrt: arr = vrt.read( out_shape=out_shape, indexes=indexes, resampling=Resampling[resampling_method], masked=True, ) params = {} if histogram_bins: params.update(dict(bins=histogram_bins)) if histogram_range: params.update(dict(range=histogram_range)) stats = { indexes[b]: _stats(arr[b], percentiles=percentiles, **params) for b in range(arr.shape[0]) if vrt.colorinterp[b] != ColorInterp.alpha } return { "bounds": { "value": bounds, "crs": dst_crs.to_string() if isinstance(dst_crs, CRS) else dst_crs, }, "minzoom": minzoom, "maxzoom": maxzoom, "band_descriptions": band_descriptions, "statistics": stats, }
def make_tile(level, tile): """ MISSING :param level: :param tile: :return: """ # x,y tile indexes x = tile[0][0] y = tile[0][1] def div_by_16(x): if divmod(x, 16)[1] == 0: return x return div_by_16(x - 1) # put tile in its respective dir out_dir = out_folder.joinpath(str(level)) if not out_dir.exists(): out_dir.mkdir(exist_ok=True) size_x = tile[1].width if tile[1].width > 0 else 1 size_y = tile[1].height if tile[1].height > 0 else 1 # Out file constructor # how many chars to use for representing the tiles. name_length = max(len(str(self.tileinfos[level].countTilesX)), len(str(self.tileinfos[level].countTilesY))) + 1 filename = name_template.format(basename=self.name, x=str(x).zfill(name_length), y=str(y).zfill(name_length)) out_filepath = out_dir.joinpath(filename) ## End profile = default_gtiff_profile profile.update( crs='epsg:4326', driver='GTiff', transform=tile[2], compress='lzw', count=1, width=size_x, height=size_y, blockysize=div_by_16(min(self.blockSize, tile[1].height)), blockxsize=div_by_16(min(self.blockSize, tile[1].width)), ) if level > 1: # except OSError: # # in this level, the amount of pixels that need to be resampled are too many. # # I am choosing to use pixel at the central coordinate of the processing tile # # Sample error: # # ERROR 1: Integer overflow : nSrcXSize=425985, nSrcYSize=163840 # TODO: don't be lazy, clean write try: self.tileinfos[level - 1] except KeyError: _meta = self.get_metadata(level - 1) self.tileinfos[level - 1] = TileInfo( _meta['width'], _meta['height'], self.TileWidth, self.TileHeight) finally: name_length = max( len(str(self.tileinfos[level - 1].countTilesX)), len(str(self.tileinfos[level - 1].countTilesY))) + 1 prev_lvl_tiles = tile_children(zoom=level, src=out_filepath, ndigits=name_length) vrt_handler = buildvrt(prev_lvl_tiles) with rio.open(vrt_handler) as src: profile.update(nodata=src.nodata, dtype=src.meta['dtype']) resolution_factor = pow(2, 1) lvlx_height = src.height / 2 lvlx_width = src.width / 2 lvlx_tranform = Affine(src.transform.a * resolution_factor, src.transform.b, src.transform.c, src.transform.d, src.transform.e * resolution_factor, src.transform.f) vrt = WarpedVRT(src, transform=lvlx_tranform, width=lvlx_width, height=lvlx_height) data = vrt.read(1) else: with self.get_dataset(level) as src: profile.update(nodata=src.nodata, dtype=src.meta['dtype']) data = src.read(1, window=tile[1]) try: with rio.open(out_filepath, 'w', **profile) as dst: window_out = Window(0, 0, size_x, size_y) dst.write(data, window=window_out, indexes=1) except: print(profile) raise Exception
def main(args): config = load_config(args.config) colors = [classe["color"] for classe in config["classes"]] tile_size = args.tile_size try: raster = rasterio_open(args.raster) w, s, e, n = bounds = transform_bounds(raster.crs, "EPSG:4326", *raster.bounds) transform, _, _ = calculate_default_transform(raster.crs, "EPSG:3857", raster.width, raster.height, *bounds) except: sys.exit("Error: Unable to load raster or deal with it's projection") tiles = [ mercantile.Tile(x=x, y=y, z=z) for x, y, z in mercantile.tiles(w, s, e, n, args.zoom) ] tiles_nodata = [] for tile in tqdm(tiles, desc="Tiling", unit="tile", ascii=True): w, s, e, n = tile_bounds = mercantile.xy_bounds(tile) # Inspired by Rio-Tiler, cf: https://github.com/mapbox/rio-tiler/pull/45 warp_vrt = WarpedVRT( raster, crs="EPSG:3857", resampling=Resampling.bilinear, add_alpha=False, transform=from_bounds(*tile_bounds, args.size, args.size), width=math.ceil((e - w) / transform.a), height=math.ceil((s - n) / transform.e), ) data = warp_vrt.read(out_shape=(len(raster.indexes), tile_size, tile_size), window=warp_vrt.window(w, s, e, n)) # If no_data is set, remove all tiles with at least one whole border filled only with no_data (on all bands) if type(args.no_data) is not None and ( np.all(data[:, 0, :] == args.no_data) or np.all(data[:, -1, :] == args.no_data) or np.all(data[:, :, 0] == args.no_data) or np.all(data[:, :, -1] == args.no_data)): tiles_nodata.append(tile) continue C, W, H = data.shape os.makedirs(os.path.join(args.out, str(args.zoom), str(tile.x)), exist_ok=True) path = os.path.join(args.out, str(args.zoom), str(tile.x), str(tile.y)) if args.type == "label": assert C == 1, "Error: Label raster input should be 1 band" ext = "png" img = Image.fromarray(np.squeeze(data, axis=0), mode="P") img.putpalette(make_palette(colors[0], colors[1])) img.save("{}.{}".format(path, ext), optimize=True) elif args.type == "image": assert C == 1 or C == 3, "Error: Image raster input should be either 1 or 3 bands" # GeoTiff could be 16 or 32bits if data.dtype == "uint16": data = np.uint8(data / 256) elif data.dtype == "uint32": data = np.uint8(data / (256 * 256)) if C == 1: ext = "png" Image.fromarray(np.squeeze(data, axis=0), mode="L").save("{}.{}".format(path, ext), optimize=True) elif C == 3: ext = "webp" Image.fromarray(np.moveaxis(data, 0, 2), mode="RGB").save("{}.{}".format(path, ext), optimize=True) if args.web_ui: template = "leaflet.html" if not args.web_ui_template else args.web_ui_template tiles = [tile for tile in tiles if tile not in tiles_nodata] base_url = args.web_ui_base_url if args.web_ui_base_url else "./" web_ui(args.out, base_url, tiles, tiles, ext, template)
def open_rasterio( filename, parse_coordinates=None, chunks=None, cache=None, lock=None, **kwargs, ): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: >>> from affine import Affine >>> da = xr.open_rasterio( ... "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif" ... ) >>> da <xarray.DataArray (band: 3, y: 718, x: 791)> [1703814 values with dtype=uint8] Coordinates: * band (band) int64 1 2 3 * y (y) float64 2.827e+06 2.826e+06 2.826e+06 ... 2.612e+06 2.612e+06 * x (x) float64 1.021e+05 1.024e+05 1.027e+05 ... 3.389e+05 3.392e+05 Attributes: transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805... crs: +init=epsg:32618 res: (300.0379266750948, 300.041782729805) is_tiled: 0 nodatavals: (0.0, 0.0, 0.0) scales: (1.0, 1.0, 1.0) offsets: (0.0, 0.0, 0.0) AREA_OR_POINT: Area >>> transform = Affine(*da.attrs["transform"]) >>> transform Affine(300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 2826915.0) >>> nx, ny = da.sizes["x"], da.sizes["y"] >>> x, y = transform * np.meshgrid(np.arange(nx) + 0.5, np.arange(ny) + 0.5) >>> x array([[102135.01896334, 102435.05689001, 102735.09481669, ..., 338564.90518331, 338864.94310999, 339164.98103666], [102135.01896334, 102435.05689001, 102735.09481669, ..., 338564.90518331, 338864.94310999, 339164.98103666], [102135.01896334, 102435.05689001, 102735.09481669, ..., 338564.90518331, 338864.94310999, 339164.98103666], ..., [102135.01896334, 102435.05689001, 102735.09481669, ..., 338564.90518331, 338864.94310999, 339164.98103666], [102135.01896334, 102435.05689001, 102735.09481669, ..., 338564.90518331, 338864.94310999, 339164.98103666], [102135.01896334, 102435.05689001, 102735.09481669, ..., 338564.90518331, 338864.94310999, 339164.98103666]]) Parameters ---------- filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates : bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock : False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. Returns ------- data : DataArray The newly created DataArray. """ import rasterio from rasterio.vrt import WarpedVRT vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( src_crs=vrt.src_crs.to_string(), crs=vrt.crs.to_string(), resampling=vrt.resampling, tolerance=vrt.tolerance, src_nodata=vrt.src_nodata, nodata=vrt.nodata, width=vrt.width, height=vrt.height, src_transform=vrt.src_transform, transform=vrt.transform, dtype=vrt.working_dtype, warp_extras=vrt.warp_extras, ) if lock is None: lock = RASTERIO_LOCK manager = CachingFileManager( rasterio.open, filename, lock=lock, mode="r", kwargs=kwargs, ) riods = manager.acquire() if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None coords = {} # Get bands if riods.count < 1: raise ValueError("Unknown dims") coords["band"] = np.asarray(riods.indexes) # Get coordinates if riods.transform.is_rectilinear: # 1d coordinates parse = True if parse_coordinates is None else parse_coordinates if parse: nx, ny = riods.width, riods.height # xarray coordinates are pixel centered x, _ = riods.transform * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) _, y = riods.transform * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) coords["y"] = y coords["x"] = x else: # 2d coordinates parse = False if (parse_coordinates is None) else parse_coordinates if parse: warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3, ) # Attributes attrs = {} # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) attrs["transform"] = tuple(riods.transform)[:6] if hasattr(riods, "crs") and riods.crs: # CRS is a dict-like object specific to rasterio # If CRS is not None, we convert it back to a PROJ4 string using # rasterio itself try: attrs["crs"] = riods.crs.to_proj4() except AttributeError: attrs["crs"] = riods.crs.to_string() if hasattr(riods, "res"): # (width, height) tuple of pixels in units of CRS attrs["res"] = riods.res if hasattr(riods, "is_tiled"): # Is the TIF tiled? (bool) # We cast it to an int for netCDF compatibility attrs["is_tiled"] = np.uint8(riods.is_tiled) if hasattr(riods, "nodatavals"): # The nodata values for the raster bands attrs["nodatavals"] = tuple(np.nan if nodataval is None else nodataval for nodataval in riods.nodatavals) if hasattr(riods, "scales"): # The scale values for the raster bands attrs["scales"] = riods.scales if hasattr(riods, "offsets"): # The offset values for the raster bands attrs["offsets"] = riods.offsets if hasattr(riods, "descriptions") and any(riods.descriptions): # Descriptions for each dataset band attrs["descriptions"] = riods.descriptions if hasattr(riods, "units") and any(riods.units): # A list of units string for each dataset band attrs["units"] = riods.units # Parse extra metadata from tags, if supported parsers = {"ENVI": _parse_envi, "GTiff": lambda m: m} driver = riods.driver if driver in parsers: if driver == "GTiff": meta = parsers[driver](riods.tags()) else: meta = parsers[driver](riods.tags(ns=driver)) for k, v in meta.items(): # Add values as coordinates if they match the band count, # as attributes otherwise if isinstance(v, (list, np.ndarray)) and len(v) == riods.count: coords[k] = ("band", np.asarray(v)) else: attrs[k] = v data = indexing.LazilyIndexedArray( RasterioArrayWrapper(manager, lock, vrt_params)) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) result = DataArray(data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs) if chunks is not None: from dask.base import tokenize # augment the token with the file modification time try: mtime = os.path.getmtime(filename) except OSError: # the filename is probably an s3 bucket rather than a regular file mtime = None token = tokenize(filename, mtime, chunks) name_prefix = f"open_rasterio-{token}" result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable result.set_close(manager.close) return result
def read_window(src, bounds, target_shape, source): source_resolution = get_resolution_in_meters(Bounds(src.bounds, src.crs), (src.height, src.width)) target_resolution = get_resolution(bounds, target_shape) # GDAL chooses target extents such that reprojected pixels are square; this # may produce pixel offsets near the edges of projected bounds # http://lists.osgeo.org/pipermail/gdal-dev/2016-August/045046.html # # A workaround for this is to produce a VRT with the explicit target extent # in projected coordinates (assuming that the target CRS is known). # Otherwise, we could tweak the origin (.c, .f) of the generated # dst_transform, but that would require knowing projected bounds of all # CRSes in use. if ("dem" in source.recipes and bounds.crs == WEB_MERCATOR_CRS and (target_resolution[0] > source_resolution[0] and target_resolution[1] > source_resolution[1])): # special case for web Mercator to prevent crosshatch artifacts; use a # target image size that most closely matches the source resolution # (and is a power of 2) zoom = min( 22, # going beyond this results in overflow within GDAL get_zoom( max( get_resolution_in_meters(Bounds(src.bounds, src.crs), (src.height, src.width))), op=math.ceil, ), ) dst_width = dst_height = (2**zoom) * 256 extent = get_extent(bounds.crs) resolution = ((extent[2] - extent[0]) / dst_width, (extent[3] - extent[1]) / dst_height) dst_transform = Affine(resolution[0], 0.0, extent[0], 0.0, -resolution[1], extent[3]) else: resolution = None if (target_resolution[0] < source_resolution[0] or target_resolution[1] < source_resolution[1]): # provide resolution for improved resampling when overzooming resolution = target_resolution (dst_transform, dst_width, dst_height) = warp.calculate_default_transform(src.crs, bounds.crs, src.width, src.height, *src.bounds, resolution=resolution) # Some OAM sources have invalid NODATA values (-1000 for a file with a # dtype of Byte). rasterio returns None under these circumstances # (indistinguishable from sources that actually have no NODATA values). # Providing a synthetic value "correctly" masks the output at the expense # of masking valid pixels with that value. This was previously (partially; # in the form of the bounding box but not NODATA pixels) addressed by # creating a VRT that mapped the mask to an alpha channel (something we # can't do w/o adding nDstAlphaBand to rasterio/_warp.pyx). # # Creating external masks and reading them separately (as below) is a # better solution, particularly as it avoids artifacts introduced when the # NODATA values are resampled using something other than nearest neighbor. if any([ColorInterp.palette in src.colorinterp]): resampling = Resampling[source.recipes.get("resample", "mode")] else: resampling = Resampling[source.recipes.get("resample", "bilinear")] src_nodata = source.recipes.get("nodata", source.meta.get("nodata", src.nodata)) add_alpha = True if (any([ MaskFlags.per_dataset in flags for flags in src.mask_flag_enums ]) and not any([MaskFlags.alpha in flags for flags in src.mask_flag_enums])): # prefer the mask if available src_nodata = None if any([MaskFlags.alpha in flags for flags in src.mask_flag_enums]): add_alpha = False w, s, e, n = bounds.bounds vrt_transform = (Affine.translation(w, n) * Affine.scale(dst_transform.a, dst_transform.e) * Affine.identity()) vrt_width = math.floor((e - w) / dst_transform.a) vrt_height = math.floor((s - n) / dst_transform.e) with WarpedVRT( src, src_nodata=src_nodata, crs=bounds.crs, width=vrt_width, height=vrt_height, transform=vrt_transform, resampling=resampling, add_alpha=add_alpha, ) as vrt: dst_window = vrt.window(*bounds.bounds) data = vrt.read(out_shape=(vrt.count, ) + target_shape, window=dst_window) mask = np.ma.nomask if source.mask: with rasterio.Env(OGR_ENABLE_PARTIAL_REPROJECTION=True): geom_mask = transform_geom(WGS84_CRS, bounds.crs, source.mask) mask_transform = from_bounds(*bounds.bounds, height=target_shape[0], width=target_shape[1]) mask = geometry_mask([geom_mask], target_shape, transform=mask_transform, invert=True) if any([ColorInterp.alpha in vrt.colorinterp]): alpha_idx = vrt.colorinterp.index(ColorInterp.alpha) mask = [~data[alpha_idx] | mask] * (vrt.count - 1) bands = [data[i] for i in range(0, vrt.count) if i != alpha_idx] data = np.ma.masked_array(bands, mask=mask) else: # mask with NODATA values if src_nodata is not None and vrt.nodata is not None: data = _mask(data, vrt.nodata) data.mask = data.mask | mask else: data = np.ma.masked_array(data, mask=mask) return PixelCollection(data, bounds)
def _get_raster_tile(cls, path: str, *, reprojection_method: str, resampling_method: str, tile_bounds: Tuple[float, float, float, float] = None, tile_size: Tuple[int, int] = (256, 256), preserve_values: bool = False) -> np.ma.MaskedArray: """Load a raster dataset from a file through rasterio. Heavily inspired by mapbox/rio-tiler """ import rasterio from rasterio import transform, windows, warp from rasterio.vrt import WarpedVRT from affine import Affine dst_bounds: Tuple[float, float, float, float] if preserve_values: reproject_enum = resampling_enum = cls._get_resampling_enum('nearest') else: reproject_enum = cls._get_resampling_enum(reprojection_method) resampling_enum = cls._get_resampling_enum(resampling_method) with contextlib.ExitStack() as es: es.enter_context(rasterio.Env(**cls._RIO_ENV_KEYS)) try: with trace('open_dataset'): src = es.enter_context(rasterio.open(path)) except OSError: raise IOError('error while reading file {}'.format(path)) # compute buonds in target CRS dst_bounds = warp.transform_bounds(src.crs, cls._TARGET_CRS, *src.bounds) if tile_bounds is None: tile_bounds = dst_bounds # prevent loads of very sparse data cover_ratio = ( (dst_bounds[2] - dst_bounds[0]) / (tile_bounds[2] - tile_bounds[0]) * (dst_bounds[3] - dst_bounds[1]) / (tile_bounds[3] - tile_bounds[1]) ) if cover_ratio < 0.01: raise exceptions.TileOutOfBoundsError('dataset covers less than 1% of tile') # compute suggested resolution in target CRS dst_transform, _, _ = warp.calculate_default_transform( src.crs, cls._TARGET_CRS, src.width, src.height, *src.bounds ) dst_res = (abs(dst_transform.a), abs(dst_transform.e)) # make sure VRT resolves the entire tile tile_transform = transform.from_bounds(*tile_bounds, *tile_size) tile_res = (abs(tile_transform.a), abs(tile_transform.e)) if tile_res[0] < dst_res[0] or tile_res[1] < dst_res[1]: dst_res = tile_res resampling_enum = cls._get_resampling_enum('nearest') # pad tile bounds to prevent interpolation artefacts num_pad_pixels = 2 # compute tile VRT shape and transform dst_width = max(1, round((tile_bounds[2] - tile_bounds[0]) / dst_res[0])) dst_height = max(1, round((tile_bounds[3] - tile_bounds[1]) / dst_res[1])) vrt_transform = ( transform.from_bounds(*tile_bounds, width=dst_width, height=dst_height) * Affine.translation(-num_pad_pixels, -num_pad_pixels) ) vrt_height, vrt_width = dst_height + 2 * num_pad_pixels, dst_width + 2 * num_pad_pixels # remove padding in output out_window = windows.Window( col_off=num_pad_pixels, row_off=num_pad_pixels, width=dst_width, height=dst_height ) # construct VRT vrt = es.enter_context( WarpedVRT( src, crs=cls._TARGET_CRS, resampling=reproject_enum, transform=vrt_transform, width=vrt_width, height=vrt_height, add_alpha=not cls._has_alpha_band(src) ) ) # read data with warnings.catch_warnings(), trace('read_from_vrt'): warnings.filterwarnings('ignore', message='invalid value encountered.*') tile_data = vrt.read( 1, resampling=resampling_enum, window=out_window, out_shape=tile_size ) # assemble alpha mask mask_idx = vrt.count mask = vrt.read(mask_idx, window=out_window, out_shape=tile_size) == 0 if src.nodata is not None: mask |= tile_data == src.nodata return np.ma.masked_array(tile_data, mask=mask)
def _map_images( plot_file_format: List[str], result_df: GeoDataFrame, filepaths: List[Union[str, Path]], bands: List[int] = [1, 2, 3], aoi: GeoDataFrame = None, show_images=True, show_features=False, name_column: str = "id", save_html: Path = None, ) -> folium.Map: """ Displays data.json, and if available, one or multiple results geotiffs. Args: plot_file_format: List of accepted image file formats e.g. [".png"] result_df: GeoDataFrame with scene geometries. aoi: GeoDataFrame of aoi. filepaths: Paths to images to plot. Optional, by default picks up the last downloaded results. show_images: Shows images if True (default). show_features: Show features if True. For quicklooks maps is set to False. name_column: Name of the feature property that provides the Feature/Layer name. save_html: The path for saving folium map as html file. With default None, no file is saved. """ if result_df.shape[0] > 100: result_df = result_df.iloc[:100] logger.info( "Only the first 100 results will be displayed to avoid memory " "issues.") centroid = box(*result_df.total_bounds).centroid m = folium_base_map( lat=centroid.y, lon=centroid.x, ) df_bounds = result_df.bounds list_bounds = df_bounds.values.tolist() raster_filepaths = [ path for path in filepaths if Path(path).suffix in plot_file_format ] try: feature_names = result_df[name_column].to_list() except KeyError: feature_names = [""] * len(result_df.index) if aoi is not None: aoi_style = VECTOR_STYLE.copy() aoi_style["color"] = "red" folium.GeoJson( aoi, name="aoi", style_function=lambda x: aoi_style, highlight_function=lambda x: HIGHLIGHT_STYLE, ).add_to(m) if show_features: for idx, row in result_df.iterrows(): # type: ignore try: feature_name = row.loc[name_column] except KeyError: feature_name = "" layer_name = f"Feature {idx + 1} - {feature_name}" f = folium.GeoJson( row["geometry"], name=layer_name, style_function=lambda x: VECTOR_STYLE, highlight_function=lambda x: HIGHLIGHT_STYLE, ) folium.Popup( f"{layer_name}: {row.drop('geometry', axis=0).to_json()}" ).add_to(f) f.add_to(m) if show_images and raster_filepaths: if len(bands) != 3: if len(bands) == 1: bands = bands * 3 # plot as grayband else: raise ValueError( "Parameter bands can only contain one or three bands.") for idx, (raster_fp, feature_name) in enumerate( zip(raster_filepaths, feature_names)): with rasterio.open(raster_fp) as src: if src.meta["crs"] is None: dst_array = src.read(bands) minx, miny, maxx, maxy = list_bounds[idx] else: # Folium requires 4326, streaming blocks are 3857 with WarpedVRT(src, crs="EPSG:4326") as vrt: dst_array = vrt.read(bands) minx, miny, maxx, maxy = vrt.bounds m.add_child( folium.raster_layers.ImageOverlay( np.moveaxis(np.stack(dst_array), 0, 2), bounds=[[miny, minx], [maxy, maxx]], # different order. name=f"Image {idx + 1} - {feature_name}", )) # Collapse layer control with too many features. collapsed = bool(result_df.shape[0] > 4) folium.LayerControl(position="bottomleft", collapsed=collapsed).add_to(m) if save_html: save_html = Path(save_html) if not save_html.exists(): save_html.mkdir(parents=True, exist_ok=True) filepath = save_html / "final_map.html" with filepath.open("w") as f: f.write(m._repr_html_()) return m
def test_warp_extras(path_rgb_byte_tif): """INIT_DEST warp extra is passed through.""" with rasterio.open(path_rgb_byte_tif) as src: with WarpedVRT(src, crs=DST_CRS, init_dest=255) as vrt: rgb = vrt.read() assert (rgb[:, 0, 0] == 255).all()
def tile_read(source, bounds, tilesize, indexes=[1], nodata=None): """Read data and mask. Attributes ---------- source : str or rasterio.io.DatasetReader input file path or rasterio.io.DatasetReader object bounds : list Mercator tile bounds (left, bottom, right, top) tilesize : int Output image size indexes : list of ints or a single int, optional, (default: 1) If `indexes` is a list, the result is a 3D array, but is a 2D array if it is a band index number. nodata: int or float, optional (defaults: None) Returns ------- out : array, int returns pixel value. """ if isinstance(indexes, int): indexes = [indexes] vrt_params = dict( add_alpha=True, crs='epsg:3857', resampling=Resampling.bilinear) if nodata is not None: vrt_params.update(dict(nodata=nodata, add_alpha=False, src_nodata=nodata, init_dest_nodata=False)) out_shape = (len(indexes), tilesize, tilesize) if isinstance(source, DatasetReader): vrt_transform, vrt_width, vrt_height = get_vrt_transform(source, bounds) vrt_params.update(dict( transform=vrt_transform, width=vrt_width, height=vrt_height )) if has_alpha_band(source): vrt_params.update(dict(add_alpha=False)) with WarpedVRT(source, **vrt_params) as vrt: data = vrt.read(out_shape=out_shape, resampling=Resampling.bilinear, indexes=indexes) mask = vrt.dataset_mask(out_shape=(tilesize, tilesize)) else: with rasterio.open(source) as src: vrt_transform, vrt_width, vrt_height = get_vrt_transform(src, bounds) vrt_params.update(dict( transform=vrt_transform, width=vrt_width, height=vrt_height )) if has_alpha_band(src): vrt_params.update(dict(add_alpha=False)) with WarpedVRT(src, **vrt_params) as vrt: data = vrt.read(out_shape=out_shape, resampling=Resampling.bilinear, indexes=indexes) mask = vrt.dataset_mask(out_shape=(tilesize, tilesize)) return data, mask
def test_invalid_add_alpha(): """Adding an alpha band to a VRT that already has one fails""" with rasterio.open('tests/data/RGBA.byte.tif') as src: with pytest.raises(WarpOptionsError): WarpedVRT(src, add_alpha=True)
def cog_translate( # noqa: C901 source: str, dst_path: str, dst_kwargs: Dict, indexes: Optional[Sequence[int]] = None, nodata: Optional[Union[str, int, float]] = None, dtype: Optional[str] = None, add_mask: bool = False, overview_level: Optional[int] = None, overview_resampling: str = "nearest", web_optimized: bool = False, tms: morecantile.TileMatrixSet = morecantile.tms.get("WebMercatorQuad"), zoom_level_strategy: str = "auto", aligned_levels: Optional[int] = None, resampling: str = "nearest", in_memory: Optional[bool] = None, config: Optional[Dict] = None, allow_intermediate_compression: bool = False, forward_band_tags: bool = False, quiet: bool = False, temporary_compression: str = "DEFLATE", ): """ Create Cloud Optimized Geotiff. Parameters ---------- source : str, PathLike object or rasterio.io.DatasetReader A dataset path, URL or rasterio.io.DatasetReader object. Will be opened in "r" mode. dst_path : str or Path-like object An output dataset path or or PathLike object. Will be opened in "w" mode. dst_kwargs: dict Output dataset creation options. indexes : tuple or int, optional Raster band indexes to copy. nodata, int, optional Overwrite nodata masking values for input dataset. dtype: str, optional Overwrite output data type. Default will be the input data type. add_mask, bool, optional Force output dataset creation with a mask. overview_level : int, optional (default: None) COGEO overview (decimation) level. By default, inferred from data size. overview_resampling : str, optional (default: "nearest") Resampling algorithm for overviews web_optimized: bool, optional (default: False) Create web-optimized cogeo. tms: morecantile.TileMatrixSet, optional (default: "WebMercatorQuad") TileMatrixSet to use for reprojection, resolution and alignment. zoom_level_strategy: str, optional (default: auto) Strategy to determine zoom level (same as in GDAL 3.2). LOWER will select the zoom level immediately below the theoretical computed non-integral zoom level, leading to subsampling. On the contrary, UPPER will select the immediately above zoom level, leading to oversampling. Defaults to AUTO which selects the closest zoom level. ref: https://gdal.org/drivers/raster/cog.html#raster-cog aligned_levels: int, optional. Number of overview levels for which GeoTIFF tile and tiles defined in the tiling scheme match. Default is to use the maximum overview levels. resampling : str, optional (default: "nearest") Resampling algorithm. in_memory: bool, optional Force processing raster in memory (default: process in memory if small) config : dict Rasterio Env options. allow_intermediate_compression: bool, optional (default: False) Allow intermediate file compression to reduce memory/disk footprint. Note: This could reduce the speed of the process. Ref: https://github.com/cogeotiff/rio-cogeo/issues/103 forward_band_tags: bool, optional Forward band tags to output bands. Ref: https://github.com/cogeotiff/rio-cogeo/issues/19 quiet: bool, optional (default: False) Mask processing steps. temporary_compression: str, optional Compression used for the intermediate file, default is deflate. """ if isinstance(indexes, int): indexes = (indexes, ) config = config or {} with rasterio.Env(**config): with ExitStack() as ctx: if isinstance(source, (DatasetReader, DatasetWriter, WarpedVRT)): src_dst = source else: src_dst = ctx.enter_context(rasterio.open(source)) meta = src_dst.meta indexes = indexes if indexes else src_dst.indexes nodata = nodata if nodata is not None else src_dst.nodata dtype = dtype if dtype else src_dst.dtypes[0] alpha = utils.has_alpha_band(src_dst) mask = utils.has_mask_band(src_dst) if not add_mask and ( (nodata is not None or alpha) and dst_kwargs.get("compress") in ["JPEG", "jpeg"]): warnings.warn( "Using lossy compression with Nodata or Alpha band " "can results in unwanted artefacts.", LossyCompression, ) tilesize = min(int(dst_kwargs["blockxsize"]), int(dst_kwargs["blockysize"])) if src_dst.width < tilesize or src_dst.height < tilesize: tilesize = 2**int( math.log(min(src_dst.width, src_dst.height), 2)) if tilesize < 64: warnings.warn( "Raster has dimension < 64px. Output COG cannot be tiled" " and overviews cannot be added.", IncompatibleBlockRasterSize, ) dst_kwargs.pop("blockxsize", None) dst_kwargs.pop("blockysize", None) dst_kwargs.pop("tiled") overview_level = 0 else: warnings.warn( "Block Size are bigger than raster sizes. " "Setting blocksize to {}".format(tilesize), IncompatibleBlockRasterSize, ) dst_kwargs["blockxsize"] = tilesize dst_kwargs["blockysize"] = tilesize vrt_params = { "add_alpha": True, "dtype": dtype, "width": src_dst.width, "height": src_dst.height, } if nodata is not None: vrt_params.update( dict(nodata=nodata, add_alpha=False, src_nodata=nodata)) if alpha: vrt_params.update(dict(add_alpha=False)) if web_optimized: params = utils.get_web_optimized_params( src_dst, tilesize=tilesize, warp_resampling=resampling, zoom_level_strategy=zoom_level_strategy, aligned_levels=aligned_levels, tms=tms, ) vrt_params.update(**params) with WarpedVRT(src_dst, **vrt_params) as vrt_dst: meta = vrt_dst.meta meta["count"] = len(indexes) if add_mask: meta.pop("nodata", None) meta.pop("alpha", None) if (dst_kwargs.get("photometric", "").upper() == "YCBCR" and meta["count"] == 1): warnings.warn( "PHOTOMETRIC=YCBCR not supported on a 1-band raster" " and has been set to 'MINISBLACK'") dst_kwargs["photometric"] = "MINISBLACK" meta.update(**dst_kwargs) meta.pop("compress", None) meta.pop("photometric", None) if allow_intermediate_compression: meta["compress"] = temporary_compression if in_memory is None: in_memory = vrt_dst.width * vrt_dst.height < IN_MEMORY_THRESHOLD if in_memory: tmpfile = ctx.enter_context(MemoryFile()) tmp_dst = ctx.enter_context(tmpfile.open(**meta)) else: tmpfile = ctx.enter_context(TemporaryRasterFile(dst_path)) tmp_dst = ctx.enter_context( rasterio.open(tmpfile.name, "w", **meta)) # Transfer color interpolation if len(indexes) == 1 and (vrt_dst.colorinterp[indexes[0] - 1] is not ColorInterp.palette): tmp_dst.colorinterp = [ColorInterp.gray] else: tmp_dst.colorinterp = [ vrt_dst.colorinterp[b - 1] for b in indexes ] if tmp_dst.colorinterp[0] is ColorInterp.palette: try: tmp_dst.write_colormap(1, vrt_dst.colormap(1)) except ValueError: warnings.warn( "Dataset has `Palette` color interpretation" " but is missing colormap information") wind = list(tmp_dst.block_windows(1)) if not quiet: click.echo("Reading input: {}".format(source), err=True) fout = os.devnull if quiet else sys.stderr with click.progressbar( wind, file=fout, show_percent=True) as windows: # type: ignore for _, w in windows: matrix = vrt_dst.read(window=w, indexes=indexes) tmp_dst.write(matrix, window=w) if add_mask or mask: # Cast mask to uint8 to fix rasterio 1.1.2 error (ref #115) mask_value = vrt_dst.dataset_mask( window=w).astype("uint8") tmp_dst.write_mask(mask_value, window=w) if overview_level is None: overview_level = get_maximum_overview_level( vrt_dst.width, vrt_dst.height, minsize=tilesize) if not quiet and overview_level: click.echo("Adding overviews...", err=True) overviews = [2**j for j in range(1, overview_level + 1)] tmp_dst.build_overviews(overviews, ResamplingEnums[overview_resampling]) if not quiet: click.echo("Updating dataset tags...", err=True) for i, b in enumerate(indexes): tmp_dst.set_band_description(i + 1, src_dst.descriptions[b - 1]) if forward_band_tags: tmp_dst.update_tags(i + 1, **src_dst.tags(b)) tags = src_dst.tags() tags.update( dict( OVR_RESAMPLING_ALG=ResamplingEnums[overview_resampling] .name.upper())) tmp_dst.update_tags(**tags) tmp_dst._set_all_scales( [vrt_dst.scales[b - 1] for b in indexes]) tmp_dst._set_all_offsets( [vrt_dst.offsets[b - 1] for b in indexes]) if not quiet: click.echo("Writing output to: {}".format(dst_path), err=True) copy(tmp_dst, dst_path, copy_src_overviews=True, **dst_kwargs)
def test_warpedvrt_float32_preserve(data): """WarpedVRT preserves float32 dtype of source""" with rasterio.open("tests/data/float32.tif") as src: with WarpedVRT(src, src_crs="EPSG:4326") as vrt: assert src.dtypes == vrt.dtypes == ("float32", )
class GCPCOGReader(COGReader): """Custom COG Reader with GCPS support. Attributes: filepath (str): Cloud Optimized GeoTIFF path. src_dataset (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT, optional): Rasterio dataset. tms (morecantile.TileMatrixSet, optional): TileMatrixSet grid definition. Defaults to `WebMercatorQuad`. minzoom (int, optional): Overwrite Min Zoom level. maxzoom (int, optional): Overwrite Max Zoom level. colormap (dict, optional): Overwrite internal colormap. nodata (int or float or str, optional): Global options, overwrite internal nodata value. unscale (bool, optional): Global options, apply internal scale and offset on all read operations. resampling_method (rasterio.enums.Resampling, optional): Global options, resampling method to use for read operations. vrt_options (dict, optional): Global options, WarpedVRT options to use for read operations. post_process (callable, optional): Global options, Function to apply after all read operations. dataset (rasterio.vrtWarpedVRT): Warped VRT constructed with dataset GCPS info. **READ ONLY attribute**. Examples: >>> with COGReader(src_path) as cog: cog.tile(...) assert cog.dataset assert cog.src_dataset >>> with rasterio.open(src_path) as src_dst: with COGReader(None, src_dataset=src_dst) as cog: cog.tile(...) """ filepath: str = attr.ib() src_dataset: Union[DatasetReader, DatasetWriter, MemoryFile, WarpedVRT] = attr.ib( default=None ) tms: TileMatrixSet = attr.ib(default=WEB_MERCATOR_TMS) minzoom: int = attr.ib(default=None) maxzoom: int = attr.ib(default=None) colormap: Dict = attr.ib(default=None) # Define global options to be forwarded to functions reading the data (e.g `rio_tiler.reader.read`) nodata: Optional[NoData] = attr.ib(default=None) unscale: Optional[bool] = attr.ib(default=None) resampling_method: Optional[Resampling] = attr.ib(default=None) vrt_options: Optional[Dict] = attr.ib(default=None) post_process: Optional[ Callable[[numpy.ndarray, numpy.ndarray], Tuple[numpy.ndarray, numpy.ndarray]] ] = attr.ib(default=None) # for GCPCOGReader, dataset is not a input option. dataset: WarpedVRT = attr.ib(init=False) # We use _kwargs to store values of nodata, unscale, vrt_options and resampling_method. # _kwargs is used avoid having to set those values on each method call. _kwargs: Dict[str, Any] = attr.ib(init=False, factory=dict) def __attrs_post_init__(self): """Define _kwargs, open dataset and get info.""" self.src_dataset = self.src_dataset or rasterio.open(self.filepath) self.dataset = WarpedVRT( self.src_dataset, src_crs=self.src_dataset.gcps[1], src_transform=transform.from_gcps(self.src_dataset.gcps[0]), ) super().__attrs_post_init__() def close(self): """Close rasterio dataset.""" self.dataset.close() if self.filepath: self.src_dataset.close()
def reproject_vrt(self, crs=None): return WarpedVRT(self.rast_ds, crs=crs)
def _landsat_stats( band, address_prefix, metadata, overview_level=None, max_size=1024, percentiles=(2, 98), dst_crs=CRS({"init": "EPSG:4326"}), histogram_bins=10, histogram_range=None, ): """ Retrieve landsat dataset statistics. Attributes ---------- band : str Landsat band number address_prefix : str A Landsat AWS S3 dataset prefix. metadata : dict Landsat metadata overview_level : int, optional Overview (decimation) level to fetch. max_size: int, optional Maximum size of dataset to retrieve (will be used to calculate the overview level to fetch). percentiles : tulple, optional Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive (default: (2, 98)). dst_crs: CRS or dict Target coordinate reference system (default: EPSG:4326). histogram_bins: int, optional Defines the number of equal-width histogram bins (default: 10). histogram_range: tuple or list, optional The lower and upper range of the bins. If not provided, range is simply the min and max of the array. Returns ------- out : dict (percentiles), min, max, stdev, histogram for each band, e.g. { "4": { 'pc': [15, 121], 'min': 1, 'max': 162, 'std': 27.22067722127997, 'histogram': [ [102934, 135489, 20981, 13548, 11406, 8799, 7351, 5622, 2985, 662] [1., 17.1, 33.2, 49.3, 65.4, 81.5, 97.6, 113.7, 129.8, 145.9, 162.] ] } } """ src_path = "{}_B{}.TIF".format(address_prefix, band) with rasterio.open(src_path) as src: levels = src.overviews(1) width = src.width height = src.height bounds = transform_bounds(src.crs, dst_crs, *src.bounds, densify_pts=21) if len(levels): if overview_level: decim = levels[overview_level] else: # determine which zoom level to read for ii, decim in enumerate(levels): if max(width // decim, height // decim) < max_size: break else: decim = 1 warnings.warn("Dataset has no overviews, reading the full dataset", NoOverviewWarning) out_shape = (height // decim, width // decim) if band == "QA": nodata = 1 else: nodata = 0 vrt_params = dict(nodata=nodata, add_alpha=False, src_nodata=nodata, init_dest_nodata=False) with WarpedVRT(src, **vrt_params) as vrt: arr = vrt.read(out_shape=out_shape, indexes=[1], masked=True) if band in ["1", "2", "3", "4", "5", "6", "7", "8", "9"]: # OLI multi_reflect = metadata["RADIOMETRIC_RESCALING"].get( "REFLECTANCE_MULT_BAND_{}".format(band)) add_reflect = metadata["RADIOMETRIC_RESCALING"].get( "REFLECTANCE_ADD_BAND_{}".format(band)) sun_elev = metadata["IMAGE_ATTRIBUTES"]["SUN_ELEVATION"] arr = 10000 * reflectance.reflectance( arr, multi_reflect, add_reflect, sun_elev, src_nodata=0) elif band in ["10", "11"]: # TIRS multi_rad = metadata["RADIOMETRIC_RESCALING"].get( "RADIANCE_MULT_BAND_{}".format(band)) add_rad = metadata["RADIOMETRIC_RESCALING"].get( "RADIANCE_ADD_BAND_{}".format(band)) k1 = metadata["TIRS_THERMAL_CONSTANTS"].get( "K1_CONSTANT_BAND_{}".format(band)) k2 = metadata["TIRS_THERMAL_CONSTANTS"].get( "K2_CONSTANT_BAND_{}".format(band)) arr = brightness_temp.brightness_temp(arr, multi_rad, add_rad, k1, k2) params = {} if histogram_bins: params.update(dict(bins=histogram_bins)) if histogram_range: params.update(dict(range=histogram_range)) stats = {band: utils._stats(arr, percentiles=percentiles, **params)} return { "bounds": { "value": bounds, "crs": dst_crs.to_string() if isinstance(dst_crs, CRS) else dst_crs, }, "statistics": stats, }
def worker(path): raster = rasterio_open(path) w, s, e, n = transform_bounds(raster.crs, "EPSG:4326", *raster.bounds) tiles = [ mercantile.Tile(x=x, y=y, z=z) for x, y, z in mercantile.tiles(w, s, e, n, args.zoom) ] tiled = [] for tile in tiles: if cover and tile not in cover: continue w, s, e, n = mercantile.xy_bounds(tile) warp_vrt = WarpedVRT( raster, crs="epsg:3857", resampling=Resampling.bilinear, add_alpha=False, transform=from_bounds(w, s, e, n, width, height), width=width, height=height, ) data = warp_vrt.read(out_shape=(len(raster.indexes), width, height), window=warp_vrt.window(w, s, e, n)) if data.dtype == "uint16": # GeoTiff could be 16 bits data = np.uint8(data / 256) elif data.dtype == "uint32": # or 32 bits data = np.uint8(data / (256 * 256)) image = np.moveaxis(data, 0, 2) # C,H,W -> H,W,C tile_key = (str(tile.x), str(tile.y), str(tile.z)) if (not args.label and len(tiles_map[tile_key]) == 1 and is_nodata( image, args.nodata, args.nodata_threshold, args.keep_borders)): progress.update() continue if len(tiles_map[tile_key]) > 1: out = os.path.join(splits_path, str(tiles_map[tile_key].index(path))) else: out = args.out x, y, z = map(int, tile) if not args.label: tile_image_to_file(out, mercantile.Tile(x=x, y=y, z=z), image) if args.label: tile_label_to_file(out, mercantile.Tile(x=x, y=y, z=z), palette, image) if len(tiles_map[tile_key]) == 1: progress.update() tiled.append(mercantile.Tile(x=x, y=y, z=z)) return tiled
def read( src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT], height: Optional[int] = None, width: Optional[int] = None, indexes: Optional[Union[Sequence[int], int]] = None, window: Optional[windows.Window] = None, force_binary_mask: bool = True, nodata: Optional[Union[float, int, str]] = None, unscale: bool = False, resampling_method: Resampling = "nearest", vrt_options: Optional[Dict] = None, post_process: Optional[Callable[[numpy.ndarray, numpy.ndarray], Tuple[numpy.ndarray, numpy.ndarray]]] = None, ) -> Tuple[numpy.ndarray, numpy.ndarray]: """Low level read function. Args: src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset. height (int, optional): Output height of the array. width (int, optional): Output width of the array. indexes (sequence of int or int, optional): Band indexes. window (rasterio.windows.Window, optional): Window to read. force_binary_mask (bool, optional): Cast returned mask to binary values (0 or 255). Defaults to `True`. nodata (int or float, optional): Overwrite dataset internal nodata value. unscale (bool, optional): Apply 'scales' and 'offsets' on output data value. Defaults to `False`. resampling_method (rasterio.enums.Resampling, optional): Rasterio's resampling algorithm. Defaults to `nearest`. vrt_options (dict, optional): Options to be passed to the rasterio.warp.WarpedVRT class. post_process (callable, optional): Function to apply on output data and mask values. Returns: tuple: Data (numpy.ndarray) and Mask (numpy.ndarray) values. """ if isinstance(indexes, int): indexes = (indexes, ) vrt_params = dict(add_alpha=True, resampling=Resampling[resampling_method]) nodata = nodata if nodata is not None else src_dst.nodata if nodata is not None: vrt_params.update( dict(nodata=nodata, add_alpha=False, src_nodata=nodata)) if has_alpha_band(src_dst): vrt_params.update(dict(add_alpha=False)) if indexes is None: indexes = non_alpha_indexes(src_dst) if indexes != src_dst.indexes: warnings.warn("Alpha band was removed from the output data array", AlphaBandWarning) out_shape = (len(indexes), height, width) if height and width else None mask_out_shape = (height, width) if height and width else None resampling = Resampling[resampling_method] if vrt_options: vrt_params.update(vrt_options) with WarpedVRT(src_dst, **vrt_params) as vrt: data = vrt.read( indexes=indexes, window=window, out_shape=out_shape, resampling=resampling, ) if ColorInterp.alpha in vrt.colorinterp: idx = vrt.colorinterp.index(ColorInterp.alpha) + 1 mask = vrt.read( indexes=idx, window=window, out_shape=mask_out_shape, resampling=resampling, out_dtype="uint8", ) else: mask = vrt.dataset_mask( window=window, out_shape=mask_out_shape, resampling=resampling, ) if force_binary_mask: mask = numpy.where(mask != 0, numpy.uint8(255), numpy.uint8(0)) if unscale: data = data.astype("float32", casting="unsafe") numpy.multiply(data, vrt.scales[0], out=data, casting="unsafe") numpy.add(data, vrt.offsets[0], out=data, casting="unsafe") if post_process: data, mask = post_process(data, mask) return data, mask
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, lock=None): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: from affine import Affine da = xr.open_rasterio('path_to_file.tif') transform = Affine.from_gdal(*da.attrs['transform']) nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform Parameters ---------- filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates : bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock : False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. Returns ------- data : DataArray The newly created DataArray. """ import rasterio from rasterio.vrt import WarpedVRT vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict(crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, dst_nodata=vrt.dst_nodata, tolerance=vrt.tolerance, warp_extras=vrt.warp_extras) if lock is None: lock = RASTERIO_LOCK manager = CachingFileManager(rasterio.open, filename, lock=lock, mode='r') riods = manager.acquire() if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None coords = OrderedDict() # Get bands if riods.count < 1: raise ValueError('Unknown dims') coords['band'] = np.asarray(riods.indexes) # Get coordinates if LooseVersion(rasterio.__version__) < '1.0': transform = riods.affine else: transform = riods.transform if transform.is_rectilinear: # 1d coordinates parse = True if parse_coordinates is None else parse_coordinates if parse: nx, ny = riods.width, riods.height # xarray coordinates are pixel centered x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * transform _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * transform coords['y'] = y coords['x'] = x else: # 2d coordinates parse = False if (parse_coordinates is None) else parse_coordinates if parse: warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3) # Attributes attrs = dict() # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) attrs['transform'] = tuple(transform)[:6] if hasattr(riods, 'crs') and riods.crs: # CRS is a dict-like object specific to rasterio # If CRS is not None, we convert it back to a PROJ4 string using # rasterio itself attrs['crs'] = riods.crs.to_string() if hasattr(riods, 'res'): # (width, height) tuple of pixels in units of CRS attrs['res'] = riods.res if hasattr(riods, 'is_tiled'): # Is the TIF tiled? (bool) # We cast it to an int for netCDF compatibility attrs['is_tiled'] = np.uint8(riods.is_tiled) if hasattr(riods, 'nodatavals'): # The nodata values for the raster bands attrs['nodatavals'] = tuple( np.nan if nodataval is None else nodataval for nodataval in riods.nodatavals) # Parse extra metadata from tags, if supported parsers = {'ENVI': _parse_envi} driver = riods.driver if driver in parsers: meta = parsers[driver](riods.tags(ns=driver)) for k, v in meta.items(): # Add values as coordinates if they match the band count, # as attributes otherwise if (isinstance(v, (list, np.ndarray)) and len(v) == riods.count): coords[k] = ('band', np.asarray(v)) else: attrs[k] = v data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper(manager, lock, vrt_params)) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) result = DataArray(data=data, dims=('band', 'y', 'x'), coords=coords, attrs=attrs) if chunks is not None: from dask.base import tokenize # augment the token with the file modification time try: mtime = os.path.getmtime(filename) except OSError: # the filename is probably an s3 bucket rather than a regular file mtime = None token = tokenize(filename, mtime, chunks) name_prefix = 'open_rasterio-%s' % token result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable result._file_obj = manager return result
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, lock=None, masked=False): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: from affine import Affine da = xr.open_rasterio('path_to_file.tif') transform = Affine.from_gdal(*da.attrs['transform']) nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform Parameters ---------- filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates : bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. Chunks can also be set to ``True`` or ``"auto"`` to choose sensible chunk sizes according to ``dask.config.get("array.chunk-size"). cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock : False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. masked : bool, optional If True, read the mask and to set values to NaN. Defaults to False. Returns ------- data : DataArray The newly created DataArray. """ parse_coordinates = True if parse_coordinates is None else parse_coordinates import rasterio from rasterio.vrt import WarpedVRT vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, dst_nodata=vrt.dst_nodata, tolerance=vrt.tolerance, transform=vrt.transform, width=vrt.width, height=vrt.height, warp_extras=vrt.warp_extras, ) if lock is None: lock = RASTERIO_LOCK manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r") riods = manager.acquire() # open the subdatasets if they exist if riods.subdatasets: data_arrays = {} for iii, subdataset in enumerate(riods.subdatasets): rioda = open_rasterio( subdataset, parse_coordinates=iii == 0 and parse_coordinates, chunks=chunks, cache=cache, lock=lock, masked=masked, ) data_arrays[rioda.name] = rioda return Dataset(data_arrays) if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None coords = OrderedDict() # Get bands if riods.count < 1: raise ValueError("Unknown dims") coords["band"] = np.asarray(riods.indexes) # Get coordinates if LooseVersion(rasterio.__version__) < LooseVersion("1.0"): transform = riods.affine else: transform = riods.transform if transform.is_rectilinear and parse_coordinates: # 1d coordinates coords.update( affine_to_coords(riods.transform, riods.width, riods.height)) elif parse_coordinates: # 2d coordinates warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3, ) # Attributes attrs = _parse_tags(riods.tags(1)) encoding = dict() # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) attrs["transform"] = tuple(transform)[:6] if hasattr(riods, "nodata") and riods.nodata is not None: # The nodata values for the raster bands if masked: encoding["_FillValue"] = riods.nodata else: attrs["_FillValue"] = riods.nodata if hasattr(riods, "scales"): # The scale values for the raster bands attrs["scales"] = riods.scales if hasattr(riods, "offsets"): # The offset values for the raster bands attrs["offsets"] = riods.offsets if hasattr(riods, "descriptions") and any(riods.descriptions): # Descriptions for each dataset band attrs["descriptions"] = riods.descriptions if hasattr(riods, "units") and any(riods.units): # A list of units string for each dataset band attrs["units"] = riods.units # Parse extra metadata from tags, if supported parsers = {"ENVI": _parse_envi} driver = riods.driver if driver in parsers: meta = parsers[driver](riods.tags(ns=driver)) for k, v in meta.items(): # Add values as coordinates if they match the band count, # as attributes otherwise if isinstance(v, (list, np.ndarray)) and len(v) == riods.count: coords[k] = ("band", np.asarray(v)) else: attrs[k] = v data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper(manager, lock, vrt_params, masked=masked)) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) da_name = attrs.pop("NETCDF_VARNAME", None) result = DataArray(data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name) result.encoding = encoding if hasattr(riods, "crs") and riods.crs: result.rio.write_crs(riods.crs, inplace=True) if chunks is not None: from dask.base import tokenize # augment the token with the file modification time try: mtime = os.path.getmtime(filename) except OSError: # the filename is probably an s3 bucket rather than a regular file mtime = None if chunks in (True, "auto"): from dask.array.core import normalize_chunks import dask if LooseVersion(dask.__version__) < LooseVersion("0.18.0"): msg = ( "Automatic chunking requires dask.__version__ >= 0.18.0 . " "You currently have version %s" % dask.__version__) raise NotImplementedError(msg) block_shape = (1, ) + riods.block_shapes[0] chunks = normalize_chunks( chunks=(1, "auto", "auto"), shape=(riods.count, riods.height, riods.width), dtype=riods.dtypes[0], previous_chunks=tuple((c, ) for c in block_shape), ) token = tokenize(filename, mtime, chunks) name_prefix = "open_rasterio-%s" % token result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable result._file_obj = manager return result