def test_read_raster_window_input_list(cleantopo_br): process_zoom = 5 conf = dict(**cleantopo_br.dict) conf["output"].update(metatiling=1) with mapchete.open(conf) as mp: mp.batch_process(process_zoom) tiles = [(tile, mp.config.output.get_path(tile)) for tile in mp.config.output_pyramid.tiles_from_bounds( mp.config.bounds, process_zoom) if path_exists(mp.config.output.get_path(tile))] upper_tile = next(mp.get_process_tiles(process_zoom - 1)) assert len(tiles) > 1 resampled = resample_from_array(in_raster=create_mosaic([ (tile, read_raster_window(path, tile)) for tile, path in tiles ]), out_tile=upper_tile) resampled2 = read_raster_window([p for _, p in tiles], upper_tile, src_nodata=0, dst_nodata=0) assert resampled.dtype == resampled2.dtype assert resampled.shape == resampled2.shape assert np.array_equal(resampled.mask, resampled2.mask) # TODO slight rounding errors occur assert np.allclose(resampled, resampled2, rtol=0.01)
def __init__(self, out_path=None, output=None, out_pyramid=None): # see if lxml is installed before checking all output tiles from lxml.builder import ElementMaker self.path = out_path self._tp = out_pyramid self._output = output self._bucket = self.path.split("/")[2] if self.path.startswith( "s3://") else None self.bucket_resource = get_boto3_bucket( self._bucket) if self._bucket else None logger.debug("initialize VRT writer for %s", self.path) if path_exists(self.path): if self._bucket: key = "/".join(self.path.split("/")[3:]) for obj in self.bucket_resource.objects.filter(Prefix=key): if obj.key == key: self._existing = { k: v for k, v in self._xml_to_entries( obj.get()['Body'].read().decode()) } else: with open(self.path) as src: self._existing = { k: v for k, v in self._xml_to_entries(src.read()) } else: self._existing = {} logger.debug("%s existing entries", len(self._existing)) self.new_entries = 0 self._new = {}
def read_raster_no_crs(input_file, indexes=None, gdal_opts=None): """ Wrapper function around rasterio.open().read(). Parameters ---------- input_file : str Path to file indexes : int or list Band index or list of band indexes to be read. gdal_opts : dict GDAL options passed on to rasterio.Env() Returns ------- MaskedArray Raises ------ FileNotFoundError if file cannot be found. """ with warnings.catch_warnings(): warnings.simplefilter("ignore") try: with rasterio.Env(**get_gdal_options( gdal_opts, is_remote=path_is_remote(input_file, s3=True))): with rasterio.open(input_file, "r") as src: return src.read(indexes=indexes, masked=True) except RasterioIOError as e: try: if path_exists(input_file): raise e except: raise e raise FileNotFoundError("%s not found" % input_file)
def driver_from_file(input_file): """ Guess driver from file by opening it. Returns ------- driver : string driver name """ file_ext = os.path.splitext(input_file)[1].split(".")[1] if file_ext == "mapchete": return "Mapchete" try: with rasterio.open(input_file): return "raster_file" except Exception as rio_exception: try: with fiona.open(input_file): return "vector_file" except Exception as fio_exception: if path_exists(input_file): logger.exception(f"fiona error: {fio_exception}") logger.exception(f"rasterio error: {rio_exception}") raise MapcheteDriverError( "%s has an unknown file extension or could not be opened by neither " "rasterio nor fiona." % input_file) else: raise FileNotFoundError("%s does not exist" % input_file)
def test_output_single_gtiff_cog_s3(output_single_gtiff_cog, mp_s3_tmpdir): tile_id = (5, 3, 7) with mapchete.open( dict(output_single_gtiff_cog.dict, output=dict(output_single_gtiff_cog.dict["output"], path=os.path.join(mp_s3_tmpdir, "cog.tif")))) as mp: process_tile = mp.config.process_pyramid.tile(*tile_id) # basic functions assert mp.config.output.profile() assert mp.config.output.empty(process_tile).mask.all() assert mp.config.output.get_path(process_tile) # check if tile exists assert not mp.config.output.tiles_exist(process_tile) # write mp.batch_process(multi=2) # check if tile exists assert mp.config.output.tiles_exist(process_tile) # read again, this time with data data = mp.config.output.read(process_tile) assert isinstance(data, np.ndarray) assert not data[0].mask.all() # write empty array data = ma.masked_array( data=np.ones(process_tile.shape), mask=np.ones(process_tile.shape), ) mp.config.output.write(process_tile, data) assert path_exists(mp.config.output.path) assert cog_validate(mp.config.output.path, strict=True)
def __init__(self, out_path=None): self.path = out_path self._bucket = self.path.split("/")[2] if self.path.startswith( "s3://") else None self.bucket_resource = get_boto3_bucket( self._bucket) if self._bucket else None logger.debug("initialize TXT writer") if path_exists(self.path): if self._bucket: key = "/".join(self.path.split("/")[3:]) for obj in self.bucket_resource.objects.filter(Prefix=key): if obj.key == key: self._existing = { l + '\n' for l in obj.get()['Body'].read().decode().split( '\n') if l } else: with open(self.path) as src: self._existing = {l for l in src} else: self._existing = {} self.new_entries = 0 if self._bucket: self.sink = "" else: self.sink = open(self.path, "w") for l in self._existing: self._write_line(l)
def open(self, tile, **kwargs): """ Return InputTile object. Parameters ---------- tile : ``Tile`` Returns ------- input tile : ``InputTile`` tile view of input data """ return InputTile( tile, tiles_paths=[(_tile, _path) for _tile, _path in [( t, os.path.join(*( [self.path, str(t.zoom), str(t.row), str(t.col)])) + "." + self._ext) for t in self.td_pyramid.tiles_from_bounds( tile.bounds, tile.zoom)] if path_exists(_path)], file_type=self._file_type, profile=self._profile, **kwargs)
def prepare(self, process_area=None, **kwargs): bounds = snap_bounds( bounds=Bounds(*process_area.intersection( box(*self.output_params["delimiters"] ["effective_bounds"])).bounds), pyramid=self.pyramid, zoom=self.zoom) if process_area else self.output_params[ "delimiters"]["effective_bounds"] height = math.ceil((bounds.top - bounds.bottom) / self.pyramid.pixel_x_size(self.zoom)) width = math.ceil((bounds.right - bounds.left) / self.pyramid.pixel_x_size(self.zoom)) logger.debug("output raster bounds: %s", bounds) logger.debug("output raster shape: %s, %s", height, width) self._profile = dict( GTIFF_DEFAULT_PROFILE, driver="GTiff", transform=Affine(self.pyramid.pixel_x_size(self.zoom), 0, bounds.left, 0, -self.pyramid.pixel_y_size(self.zoom), bounds.top), height=height, width=width, count=self.output_params["bands"], crs=self.pyramid.crs, **{ k: self.output_params.get(k, GTIFF_DEFAULT_PROFILE[k]) for k in GTIFF_DEFAULT_PROFILE.keys() }, bigtiff=self.output_params.get("bigtiff", "NO")) logger.debug("single GTiff profile: %s", self._profile) self.in_memory = (self.in_memory if self.in_memory is False else height * width < IN_MEMORY_THRESHOLD) # set up rasterio if path_exists(self.path): if self.output_params["mode"] != "overwrite": raise MapcheteConfigError( "single GTiff file already exists, use overwrite mode to replace" ) else: logger.debug("remove existing file: %s", self.path) os.remove(self.path) # create output directory if necessary makedirs(os.path.dirname(self.path)) logger.debug("open output file: %s", self.path) self._ctx = ExitStack() # (1) use memfile if output is remote or COG if self.cog or path_is_remote(self.path): if self.in_memory: self._memfile = self._ctx.enter_context(MemoryFile()) self.dst = self._ctx.enter_context( self._memfile.open(**self._profile)) else: # in case output raster is too big, use tempfile on disk self._tempfile = self._ctx.enter_context(NamedTemporaryFile()) self.dst = self._ctx.enter_context( rasterio.open(self._tempfile.name, "w+", **self._profile)) else: self.dst = self._ctx.enter_context( rasterio.open(self.path, "w+", **self._profile))
def _get_tiles_paths(basepath=None, ext=None, pyramid=None, bounds=None, zoom=None): return [(_tile, _path) for _tile, _path in [(t, "%s.%s" % (os.path.join(*( [basepath, str(t.zoom), str(t.row), str(t.col)])), ext)) for t in pyramid.tiles_from_bounds(bounds, zoom)] if path_exists(_path)]
def _rasterio_read( input_file=None, indexes=None, dst_bounds=None, dst_shape=None, dst_crs=None, resampling=None, src_nodata=None, dst_nodata=None, ): def _read(src, indexes, dst_bounds, dst_shape, dst_crs, resampling, src_nodata, dst_nodata): height, width = dst_shape[-2:] if indexes is None: dst_shape = (len(src.indexes), height, width) indexes = list(src.indexes) src_nodata = src.nodata if src_nodata is None else src_nodata dst_nodata = src.nodata if dst_nodata is None else dst_nodata dst_left, dst_bottom, dst_right, dst_top = dst_bounds with WarpedVRT(src, crs=dst_crs, src_nodata=src_nodata, nodata=dst_nodata, width=width, height=height, transform=affine_from_bounds(dst_left, dst_bottom, dst_right, dst_top, width, height), resampling=Resampling[resampling]) as vrt: return vrt.read(window=vrt.window(*dst_bounds), out_shape=dst_shape, indexes=indexes, masked=True) if isinstance(input_file, str): logger.debug("got file path %s", input_file) try: with rasterio.open(input_file, "r") as src: return _read(src, indexes, dst_bounds, dst_shape, dst_crs, resampling, src_nodata, dst_nodata) except RasterioIOError as e: try: if path_exists(input_file): raise e except: raise e raise FileNotFoundError("%s not found" % input_file) else: # pragma: no cover logger.debug("assuming file object %s", input_file) warnings.warn( "passing on a rasterio dataset object is not recommended, see " "https://github.com/mapbox/rasterio/issues/1309") return _read(input_file, indexes, dst_bounds, dst_shape, dst_crs, resampling, src_nodata, dst_nodata)
def tiles_exist(self, process_tile=None, output_tile=None): """ Check whether output tiles of a tile (either process or output) exists. Parameters ---------- process_tile : ``BufferedTile`` must be member of process ``TilePyramid`` output_tile : ``BufferedTile`` must be member of output ``TilePyramid`` Returns ------- exists : bool """ if process_tile and output_tile: raise ValueError( "just one of 'process_tile' and 'output_tile' allowed") if process_tile: return any( path_exists(self.get_path(tile)) for tile in self.pyramid.intersecting(process_tile)) if output_tile: return path_exists(self.get_path(output_tile))
def prepare(self, process_area=None, **kwargs): bounds = snap_bounds( bounds=Bounds(*process_area.intersection( box(*self.output_params["delimiters"] ["effective_bounds"])).bounds), pyramid=self.pyramid, zoom=self.zoom) if process_area else self.output_params[ "delimiters"]["effective_bounds"] height = math.ceil((bounds.top - bounds.bottom) / self.pyramid.pixel_x_size(self.zoom)) width = math.ceil((bounds.right - bounds.left) / self.pyramid.pixel_x_size(self.zoom)) logger.debug("output raster bounds: %s", bounds) logger.debug("output raster shape: %s, %s", height, width) self._profile = dict( GTIFF_DEFAULT_PROFILE, driver="GTiff", transform=Affine(self.pyramid.pixel_x_size(self.zoom), 0, bounds.left, 0, -self.pyramid.pixel_y_size(self.zoom), bounds.top), height=height, width=width, count=self.output_params["bands"], crs=self.pyramid.crs, **{ k: self.output_params.get(k, GTIFF_DEFAULT_PROFILE[k]) for k in GTIFF_DEFAULT_PROFILE.keys() }) logger.debug("single GTiff profile: %s", self._profile) if height * width > 20000 * 20000: raise ValueError("output GeoTIFF too big") # set up rasterio if path_exists(self.path): if self.output_params["mode"] != "overwrite": raise MapcheteConfigError( "single GTiff file already exists, use overwrite mode to replace" ) else: logger.debug("remove existing file: %s", self.path) os.remove(self.path) logger.debug("open output file: %s", self.path) self.rio_file = rasterio.open(self.path, "w+", **self._profile)
def test_remote_path_exists(http_raster): assert path_exists(http_raster) assert not path_exists("http://ungarj.github.io/invalid_file.tif")
def test_s3_path_exists(s2_band_remote): assert path_exists(s2_band_remote)