def prepare(self, process_area=None, **kwargs): bounds = snap_bounds( bounds=Bounds(*process_area.intersection( box(*self.output_params["delimiters"] ["effective_bounds"])).bounds), pyramid=self.pyramid, zoom=self.zoom) if process_area else self.output_params[ "delimiters"]["effective_bounds"] height = math.ceil((bounds.top - bounds.bottom) / self.pyramid.pixel_x_size(self.zoom)) width = math.ceil((bounds.right - bounds.left) / self.pyramid.pixel_x_size(self.zoom)) logger.debug("output raster bounds: %s", bounds) logger.debug("output raster shape: %s, %s", height, width) self._profile = dict( GTIFF_DEFAULT_PROFILE, driver="GTiff", transform=Affine(self.pyramid.pixel_x_size(self.zoom), 0, bounds.left, 0, -self.pyramid.pixel_y_size(self.zoom), bounds.top), height=height, width=width, count=self.output_params["bands"], crs=self.pyramid.crs, **{ k: self.output_params.get(k, GTIFF_DEFAULT_PROFILE[k]) for k in GTIFF_DEFAULT_PROFILE.keys() }, bigtiff=self.output_params.get("bigtiff", "NO")) logger.debug("single GTiff profile: %s", self._profile) self.in_memory = (self.in_memory if self.in_memory is False else height * width < IN_MEMORY_THRESHOLD) # set up rasterio if path_exists(self.path): if self.output_params["mode"] != "overwrite": raise MapcheteConfigError( "single GTiff file already exists, use overwrite mode to replace" ) else: logger.debug("remove existing file: %s", self.path) os.remove(self.path) # create output directory if necessary makedirs(os.path.dirname(self.path)) logger.debug("open output file: %s", self.path) self._ctx = ExitStack() # (1) use memfile if output is remote or COG if self.cog or path_is_remote(self.path): if self.in_memory: self._memfile = self._ctx.enter_context(MemoryFile()) self.dst = self._ctx.enter_context( self._memfile.open(**self._profile)) else: # in case output raster is too big, use tempfile on disk self._tempfile = self._ctx.enter_context(NamedTemporaryFile()) self.dst = self._ctx.enter_context( rasterio.open(self._tempfile.name, "w+", **self._profile)) else: self.dst = self._ctx.enter_context( rasterio.open(self.path, "w+", **self._profile))
def read_raster_no_crs(input_file, indexes=None, gdal_opts=None): """ Wrapper function around rasterio.open().read(). Parameters ---------- input_file : str Path to file indexes : int or list Band index or list of band indexes to be read. gdal_opts : dict GDAL options passed on to rasterio.Env() Returns ------- MaskedArray Raises ------ FileNotFoundError if file cannot be found. """ with warnings.catch_warnings(): warnings.simplefilter("ignore") try: with rasterio.Env(**get_gdal_options( gdal_opts, is_remote=path_is_remote(input_file, s3=True))): with rasterio.open(input_file, "r") as src: return src.read(indexes=indexes, masked=True) except RasterioIOError as e: try: if path_exists(input_file): raise e except: raise e raise FileNotFoundError("%s not found" % input_file)
def read_raster_window(input_files, tile, indexes=None, resampling="nearest", src_nodata=None, dst_nodata=None, gdal_opts=None): """ Return NumPy arrays from an input raster. NumPy arrays are reprojected and resampled to tile properties from input raster. If tile boundaries cross the antimeridian, data on the other side of the antimeridian will be read and concatenated to the numpy array accordingly. Parameters ---------- input_files : string or list path to a raster file or list of paths to multiple raster files readable by rasterio. tile : Tile a Tile object indexes : list or int a list of band numbers; None will read all. resampling : string one of "nearest", "average", "bilinear" or "lanczos" src_nodata : int or float, optional if not set, the nodata value from the source dataset will be used dst_nodata : int or float, optional if not set, the nodata value from the source dataset will be used gdal_opts : dict GDAL options passed on to rasterio.Env() Returns ------- raster : MaskedArray """ try: with rasterio.Env(**get_gdal_options( gdal_opts, is_remote=path_is_remote(input_files[0] if isinstance( input_files, list) else input_files, s3=True) if isinstance(input_files, str) else False)) as env: logger.debug("reading %s with GDAL options %s", input_files, env.options) return _read_raster_window(input_files, tile, indexes=indexes, resampling=resampling, src_nodata=src_nodata, dst_nodata=dst_nodata) except FileNotFoundError: # pragma: no cover raise except Exception as e: # pragma: no cover raise MapcheteIOError(e)
def __init__(self, tile, raster_file, **kwargs): """Initialize.""" self.tile = tile self.raster_file = raster_file if io.path_is_remote(raster_file.path): file_ext = os.path.splitext(raster_file.path)[1] self.gdal_opts = { "GDAL_DISABLE_READDIR_ON_OPEN": True, "CPL_VSIL_CURL_ALLOWED_EXTENSIONS": "%s,.ovr" % file_ext } else: self.gdal_opts = {}
def _path_exists(path): """Check if file exists either remote or local.""" if path_is_remote(path): try: urlopen(path).info() return True except HTTPError as e: if e.code == 404: return False else: raise else: return os.path.exists(path)
def read_raster_window( input_file, tile, indexes=None, resampling="nearest", src_nodata=None, dst_nodata=None, gdal_opts=None ): """ Return NumPy arrays from an input raster. NumPy arrays are reprojected and resampled to tile properties from input raster. If tile boundaries cross the antimeridian, data on the other side of the antimeridian will be read and concatenated to the numpy array accordingly. Parameters ---------- input_file : string path to a raster file readable by rasterio. tile : Tile a Tile object indexes : list or int a list of band numbers; None will read all. resampling : string one of "nearest", "average", "bilinear" or "lanczos" src_nodata : int or float, optional if not set, the nodata value from the source dataset will be used dst_nodata : int or float, optional if not set, the nodata value from the source dataset will be used gdal_opts : dict GDAL options passed on to rasterio.Env() Returns ------- raster : MaskedArray """ dst_shape = tile.shape user_opts = {} if gdal_opts is None else dict(**gdal_opts) if path_is_remote(input_file): gdal_opts = dict(**GDAL_HTTP_OPTS) gdal_opts.update(**user_opts) else: gdal_opts = user_opts if not isinstance(indexes, int): if indexes is None: dst_shape = (None,) + dst_shape elif len(indexes) == 1: indexes = indexes[0] else: dst_shape = (len(indexes),) + dst_shape # Check if potentially tile boundaries exceed tile matrix boundaries on # the antimeridian, the northern or the southern boundary. if tile.pixelbuffer and _is_on_edge(tile): return _get_warped_edge_array( tile=tile, input_file=input_file, indexes=indexes, dst_shape=dst_shape, resampling=resampling, src_nodata=src_nodata, dst_nodata=dst_nodata, gdal_opts=gdal_opts ) # If tile boundaries don't exceed pyramid boundaries, simply read window # once. else: return _get_warped_array( input_file=input_file, indexes=indexes, dst_bounds=tile.bounds, dst_shape=dst_shape, dst_crs=tile.crs, resampling=resampling, src_nodata=src_nodata, dst_nodata=dst_nodata, gdal_opts=gdal_opts )
def close(self): from lxml.builder import ElementMaker logger.debug("%s new entries in %s", self.new_entries, self) if not self._new: logger.debug("no entries to write") return # combine existing and new entries all_entries = {**self._existing, **self._new} logger.debug("writing a total of %s entries", len(all_entries)) # get VRT attributes vrt_affine, vrt_shape = raster.tiles_to_affine_shape( list(all_entries.keys())) vrt_dtype = _gdal_typename(self._output.profile()["dtype"]) vrt_nodata = self._output.output_params["nodata"] # build XML E = ElementMaker() vrt = E.VRTDataset( E.SRS(self._tp.crs.wkt), E.GeoTransform(", ".join(map(str, vrt_affine.to_gdal()))), *[ E.VRTRasterBand( E.NoDataValue(str(vrt_nodata)), E.ColorInterp("Gray"), *[ E.ComplexSource( E.SourceFilename( _tile_path(orig_path=path, for_gdal=True) if path_is_remote(path) else relative_path( path=path, base_dir=os.path.split(self.path)[0]), relativeToVRT="0" if path_is_remote(path) else "1"), E.SourceBand(str(b_idx)), E.SourceProperties( RasterXSize=str(tile.shape.width), RasterYSize=str(tile.shape.height), DataType=vrt_dtype, BlockXSize=str(self._output.profile().get( "blockxsize", self._tp.tile_size)), BlockYSize=str(self._output.profile().get( "blockysize", self._tp.tile_size)), ), E.SrcRect( xOff="0", yOff="0", xSize=str(tile.shape.width), ySize=str(tile.shape.height), ), E.DstRect( xOff=str( list( raster.bounds_to_ranges( out_bounds=tile.bounds, in_affine=vrt_affine, in_shape=vrt_shape))[2]), yOff=str( list( raster.bounds_to_ranges( out_bounds=tile.bounds, in_affine=vrt_affine, in_shape=vrt_shape))[0]), xSize=str(tile.shape.width), ySize=str(tile.shape.height), ), E.NODATA(str(vrt_nodata))) for tile, path in sorted(all_entries.items(), key=operator.itemgetter(1)) ], dataType=vrt_dtype, band=str(b_idx)) for b_idx in range(1, self._output.profile()["count"] + 1) ], rasterXSize=str(vrt_shape.width), rasterYSize=str(vrt_shape.height), ) # generate pretty XML and write xmlstr = minidom.parseString(ET.tostring(vrt)).toprettyxml(indent=" ") if self._bucket: key = "/".join(self.path.split("/")[3:]) logger.debug("upload %s", key) self.bucket_resource.put_object(Key=key, Body=xmlstr) else: logger.debug("write to %s", self.path) with open(self.path, "w") as dst: dst.write(xmlstr)
def _absolute_path(directory, path): """Return absolute path if local.""" return path if path_is_remote(path) else os.path.abspath( os.path.join(directory, path))
def close(self, exc_type=None, exc_value=None, exc_traceback=None): """Build overviews and write file.""" try: # only in case no Exception was raised if not exc_type: # build overviews if self.overviews and self.dst is not None: logger.debug( "build overviews using %s resampling and levels %s", self.overviews_resampling, self.overviews_levels) self.dst.build_overviews( self.overviews_levels, Resampling[self.overviews_resampling]) self.dst.update_tags(ns='rio_overview', resampling=self.overviews_resampling) # write if self.cog: if path_is_remote(self.path): # remote COG: copy to tempfile and upload to destination logger.debug("upload to %s", self.path) # TODO this writes a memoryfile to disk and uploads the file, # this is inefficient but until we find a solution to copy # from one memoryfile to another the rasterio way (rasterio needs # to rearrange the data so the overviews are at the beginning of # the GTiff in order to be a valid COG). with NamedTemporaryFile() as tmp_dst: copy(self.dst, tmp_dst.name, copy_src_overviews=True, **self._profile) self._bucket_resource.upload_file( Filename=tmp_dst.name, Key="/".join(self.path.split("/")[3:]), ) else: # local COG: copy to destination logger.debug("write to %s", self.path) copy(self.dst, self.path, copy_src_overviews=True, **self._profile) else: if path_is_remote(self.path): # remote GTiff: upload memfile or tempfile to destination logger.debug("upload to %s", self.path) if self.in_memory: self._bucket_resource.put_object( Body=self._memfile, Key="/".join(self.path.split("/")[3:]), ) else: self._bucket_resource.upload_file( Filename=self._tempfile.name, Key="/".join(self.path.split("/")[3:]), ) else: # local GTiff: already written, do nothing pass finally: self._ctx.close()