def test_tiles_exist_local(example_mapchete): bounds = (2.0, 0.0, 4.0, 2.0) zoom = 10 with mapchete.open(dict(example_mapchete.dict, pyramid=dict(example_mapchete.dict["pyramid"], metatiling=4), output=dict(example_mapchete.dict["output"], metatiling=1)), bounds=bounds) as mp: # generate tile directory mp.batch_process(zoom=zoom) process_tiles = list( mp.config.process_pyramid.tiles_from_bounds(bounds, zoom)) output_tiles = list( mp.config.output_pyramid.tiles_from_bounds(bounds, zoom)) # see which files were written and create set for output_tiles and process_tiles out_path = os.path.join(SCRIPTDIR, example_mapchete.dict["output"]["path"], str(zoom)) written_output_tiles = set() for root, dirs, files in os.walk(out_path): for file in files: zoom, row = map(int, root.split("/")[-2:]) col = int(file.split(".")[0]) written_output_tiles.add( mp.config.output_pyramid.tile(zoom, row, col)) written_process_tiles = set([ mp.config.process_pyramid.intersecting(t)[0] for t in written_output_tiles ]) # process tiles existing = set() not_existing = set() for tile, exists in tiles_exist(config=mp.config, process_tiles=process_tiles, multi=4): if exists: existing.add(tile) else: not_existing.add(tile) assert existing == written_process_tiles assert not_existing assert set(process_tiles) == existing.union(not_existing) # output tiles existing = set() not_existing = set() for tile, exists in tiles_exist(config=mp.config, output_tiles=output_tiles, multi=1): if exists: existing.add(tile) else: not_existing.add(tile) assert existing == written_output_tiles assert not_existing assert set(output_tiles) == existing.union(not_existing)
def test_tiles_exist_s3(gtiff_s3, mp_s3_tmpdir): bounds = (0, 0, 10, 10) # bounds = (3, 1, 4, 2) zoom = 5 with mapchete.open(dict(gtiff_s3.dict, pyramid=dict(gtiff_s3.dict["pyramid"], metatiling=8), output=dict(gtiff_s3.dict["output"], metatiling=1)), bounds=bounds, mode="overwrite") as mp: # generate tile directory mp.batch_process(zoom=zoom) process_tiles = list( mp.config.process_pyramid.tiles_from_bounds(bounds, zoom)) output_tiles = list( mp.config.output_pyramid.tiles_from_bounds(bounds, zoom)) # manually check which tiles exist written_output_tiles = set() for t in output_tiles: if mp.config.output_reader.tiles_exist(output_tile=t): written_output_tiles.add(t) written_process_tiles = set([ mp.config.process_pyramid.intersecting(t)[0] for t in written_output_tiles ]) # process tiles existing = set() not_existing = set() for tile, exists in tiles_exist(config=mp.config, process_tiles=process_tiles, multi=4): if exists: existing.add(tile) else: not_existing.add(tile) assert existing == written_process_tiles assert set(process_tiles) == existing.union(not_existing) # output tiles existing = set() not_existing = set() for tile, exists in tiles_exist(config=mp.config, output_tiles=output_tiles, multi=1): if exists: existing.add(tile) else: not_existing.add(tile) assert existing == written_output_tiles assert set(output_tiles) == existing.union(not_existing)
def skip_tiles(self, tiles=None): """ Quickly determine whether tiles can be skipped for processing. The skip value is True if process mode is 'continue' and process output already exists. In all other cases, skip is False. Parameters ---------- tiles : list of process tiles Yields ------ tuples : (tile, skip) """ # only check for existing output in "continue" mode if self.config.mode == "continue": yield from tiles_exist(config=self.config, process_tiles=tiles) # otherwise don't skip tiles else: for tile in tiles: yield (tile, False)
def cp(input_, output, zoom=None, area=None, area_crs=None, bounds=None, bounds_crs=None, wkt_geometry=None, overwrite=False, verbose=False, no_pbar=False, debug=False, logfile=None, multi=None, username=None, password=None): """Copy TileDirectory.""" if zoom is None: # pragma: no cover raise click.UsageError("zoom level(s) required") src_fs = fs_from_path(input_, username=username, password=password) dst_fs = fs_from_path(output, username=username, password=password) # open source tile directory with mapchete.open(input_, zoom=zoom, area=area, area_crs=area_crs, bounds=bounds, bounds_crs=bounds_crs, wkt_geometry=wkt_geometry, username=username, password=password, fs=src_fs) as src_mp: tp = src_mp.config.output_pyramid # copy metadata to destination if necessary src_metadata = os.path.join(input_, "metadata.json") dst_metadata = os.path.join(output, "metadata.json") if not dst_fs.exists(dst_metadata): logger.debug(f"copy {src_metadata} to {dst_metadata}") _copy(src_fs, src_metadata, dst_fs, dst_metadata) with mapchete.open(output, zoom=zoom, area=area, area_crs=area_crs, bounds=bounds, bounds_crs=bounds_crs, wkt_geometry=wkt_geometry, username=username, password=password, fs=dst_fs) as dst_mp: for z in range(min(zoom), max(zoom) + 1): click.echo(f"copy zoom {z}...") # materialize all tiles aoi_geom = src_mp.config.area_at_zoom(z) tiles = [ t for t in tp.tiles_from_geom(aoi_geom, z) # this is required to omit tiles touching the config area if aoi_geom.intersection(t.bbox).area ] # check which source tiles exist logger.debug("looking for existing source tiles...") src_tiles_exist = { tile: exists for tile, exists in tiles_exist( config=src_mp.config, output_tiles=tiles, multi=multi) } logger.debug("looking for existing destination tiles...") # chech which destination tiles exist dst_tiles_exist = { tile: exists for tile, exists in tiles_exist( config=dst_mp.config, output_tiles=tiles, multi=multi) } # copy for tile in tqdm.tqdm(tiles, unit="tile", disable=debug or no_pbar): src_path = src_mp.config.output_reader.get_path(tile) # only copy if source tile exists if src_tiles_exist[tile]: # skip if destination tile exists and overwrite is deactivated if dst_tiles_exist[tile] and not overwrite: logger.debug(f"{tile}: destination tile exists") continue # copy from source to target else: dst_path = dst_mp.config.output_reader.get_path( tile) logger.debug( f"{tile}: copy {src_path} to {dst_path}") _copy(src_fs, src_path, dst_fs, dst_path) else: logger.debug( f"{tile}: source tile ({src_path}) does not exist")
def rm_( input_, zoom=None, area=None, area_crs=None, bounds=None, bounds_crs=None, wkt_geometry=None, multi=None, verbose=False, no_pbar=False, debug=False, logfile=None, force=None, ): """Copy TileDirectory.""" if zoom is None: # pragma: no cover raise click.UsageError("zoom level(s) required") src_fs = fs_from_path(input_) # open source tile directory with mapchete.open( input_, zoom=zoom, area=area, area_crs=area_crs, bounds=bounds, bounds_crs=bounds_crs, wkt_geometry=wkt_geometry, fs=src_fs, mode="readonly" ) as src_mp: tp = src_mp.config.output_pyramid tiles = {} for z in range(min(zoom), max(zoom) + 1): tiles[z] = [] # check which source tiles exist logger.debug(f"looking for existing source tiles in zoom {z}...") for tile, exists in tiles_exist( config=src_mp.config, output_tiles=[ t for t in tp.tiles_from_geom(src_mp.config.area_at_zoom(z), z) # this is required to omit tiles touching the config area if src_mp.config.area_at_zoom(z).intersection(t.bbox).area ], multi=multi ): if exists: tiles[z].append(tile) total_tiles = sum([len(v) for v in tiles.values()]) if total_tiles: if force or click.confirm( f"Do you want to delete {total_tiles} tiles?", abort=True ): # remove rm( [ src_mp.config.output_reader.get_path(tile) for zoom_tiles in tiles.values() for tile in zoom_tiles ], fs=src_fs ) else: # pragma: no cover click.echo("No tiles found to delete.")
def zoom_index_gen( mp=None, out_dir=None, zoom=None, geojson=False, gpkg=False, shapefile=False, txt=False, vrt=False, fieldname="location", basepath=None, for_gdal=True, threading=False, ): """ Generate indexes for given zoom level. Parameters ---------- mp : Mapchete object process output to be indexed out_dir : path optionally override process output directory zoom : int zoom level to be processed geojson : bool generate GeoJSON index (default: False) gpkg : bool generate GeoPackage index (default: False) shapefile : bool generate Shapefile index (default: False) txt : bool generate tile path list textfile (default: False) vrt : bool GDAL-style VRT file (default: False) fieldname : str field name which contains paths of tiles (default: "location") basepath : str if set, use custom base path instead of output path for_gdal : bool use GDAL compatible remote paths, i.e. add "/vsicurl/" before path (default: True) """ for zoom in get_zoom_levels(process_zoom_levels=zoom): with ExitStack() as es: # get index writers for all enabled formats index_writers = [] if geojson: index_writers.append( es.enter_context( VectorFileWriter(driver="GeoJSON", out_path=_index_file_path( out_dir, zoom, "geojson"), crs=mp.config.output_pyramid.crs, fieldname=fieldname))) if gpkg: index_writers.append( es.enter_context( VectorFileWriter(driver="GPKG", out_path=_index_file_path( out_dir, zoom, "gpkg"), crs=mp.config.output_pyramid.crs, fieldname=fieldname))) if shapefile: index_writers.append( es.enter_context( VectorFileWriter(driver="ESRI Shapefile", out_path=_index_file_path( out_dir, zoom, "shp"), crs=mp.config.output_pyramid.crs, fieldname=fieldname))) if txt: index_writers.append( es.enter_context( TextFileWriter( out_path=_index_file_path(out_dir, zoom, "txt")))) if vrt: index_writers.append( es.enter_context( VRTFileWriter(out_path=_index_file_path( out_dir, zoom, "vrt"), output=mp.config.output, out_pyramid=mp.config.output_pyramid))) logger.debug("use the following index writers: %s", index_writers) # all output tiles for given process area logger.debug("determine affected output tiles") output_tiles = set([ tile for tile in mp.config.output_pyramid.tiles_from_geom( mp.config.area_at_zoom(zoom), zoom) # this is required to omit tiles touching the config area if tile.bbox.intersection(mp.config.area_at_zoom(zoom)).area ]) # check which tiles exist in any index logger.debug("check which tiles exist in index(es)") existing_in_any_index = set(tile for tile in output_tiles if any([ i.entry_exists(tile=tile, path=_tile_path( orig_path=mp.config.output.get_path(tile), basepath=basepath, for_gdal=for_gdal)) for i in index_writers ])) logger.debug("{}/{} tiles found in index(es)".format( len(existing_in_any_index), len(output_tiles))) # tiles which do not exist in any index for tile, output_exists in tiles_exist( mp.config, output_tiles=output_tiles.difference( existing_in_any_index)): tile_path = _tile_path( orig_path=mp.config.output.get_path(tile), basepath=basepath, for_gdal=for_gdal) indexes = [ i for i in index_writers if not i.entry_exists(tile=tile, path=tile_path) ] if indexes and output_exists: logger.debug("%s exists", tile_path) logger.debug("write to %s indexes" % len(indexes)) for index in indexes: index.write(tile, tile_path) # yield tile for progress information yield tile # tiles which exist in at least one index for tile in existing_in_any_index: tile_path = _tile_path( orig_path=mp.config.output.get_path(tile), basepath=basepath, for_gdal=for_gdal) indexes = [ i for i in index_writers if not i.entry_exists(tile=tile, path=tile_path) ] if indexes: logger.debug("%s exists", tile_path) logger.debug("write to %s indexes" % len(indexes)) for index in indexes: index.write(tile, tile_path) # yield tile for progress information yield tile