def test_optimize_rasters_small(tiny_raster_file, tmpdir): from terracotta.cog import validate from terracotta.scripts import cli input_pattern = str(tiny_raster_file) outfile = tmpdir / tiny_raster_file.basename runner = CliRunner() result = runner.invoke( cli.cli, ['optimize-rasters', input_pattern, '-o', str(tmpdir)]) assert result.exit_code == 0, format_exception(result) assert outfile.check() # validate files # (small rasters don't need overviews, so input file is valid, too) assert validate(str(tiny_raster_file)) assert validate(str(outfile)) # check for data integrity with rasterio.open(str(tiny_raster_file)) as src1, rasterio.open( str(outfile)) as src2: with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'invalid value encountered.*') np.testing.assert_array_equal(src1.read(), src2.read())
def test_optimize_rasters(unoptimized_raster_file, tmpdir, in_memory, reproject, compression, nproc): from terracotta.cog import validate from terracotta.scripts import cli input_pattern = str(unoptimized_raster_file.dirpath('*.tif')) outfile = tmpdir / unoptimized_raster_file.basename runner = CliRunner() flags = ['--compression', compression, '-q'] if in_memory is not None: flags.append('--in-memory' if in_memory else '--no-in-memory') if reproject: flags.append('--reproject') if nproc is not None: flags.append(f'--nproc={nproc}') result = runner.invoke( cli.cli, ['optimize-rasters', input_pattern, '-o', str(tmpdir), *flags]) assert result.exit_code == 0, format_exception(result) assert outfile.check() # validate files assert not validate(str(unoptimized_raster_file)) assert validate(str(outfile)) if reproject: return # check for data integrity with rasterio.open(str(unoptimized_raster_file)) as src1, rasterio.open( str(outfile)) as src2: with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'invalid value encountered.*') np.testing.assert_array_equal(src1.read(), src2.read())
def test_validate_optimized_small(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.tif') raster_data = 1000 * np.random.rand(128, 128).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(height=raster_data.shape[0], width=raster_data.shape[1]) with rasterio.open(outfile, 'w', **profile) as dst: dst.write(raster_data, 1) assert cog.validate(outfile)
def test_validate_not_gtiff(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.png') raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(height=raster_data.shape[0], width=raster_data.shape[1], driver='PNG') with rasterio.open(outfile, 'w', **profile) as dst: dst.write(raster_data, 1) assert not cog.validate(outfile)
def test_validate_not_tiled(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.tif') raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(height=raster_data.shape[0], width=raster_data.shape[1]) with rasterio.open(outfile, 'w', **profile) as dst: dst.write(raster_data, 1) overviews = [2**j for j in range(1, 4)] dst.build_overviews(overviews, Resampling.nearest) assert not cog.validate(outfile)
def test_validate_no_overviews(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.tif') raster_data = 1000 * np.random.rand(1024, 1024).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(height=raster_data.shape[0], width=raster_data.shape[1], tiled=True, blockxsize=256, blockysize=256) with rasterio.open(outfile, 'w', **profile) as dst: dst.write(raster_data, 1) assert not cog.validate(outfile)
def test_validate_optimized(tmpdir): from terracotta import cog outfile = str(tmpdir / 'raster.tif') raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(height=raster_data.shape[0], width=raster_data.shape[1], tiled=True, blockxsize=256, blockysize=256) with MemoryFile() as mf, mf.open(**profile) as dst: dst.write(raster_data, 1) overviews = [2**j for j in range(1, 4)] dst.build_overviews(overviews, Resampling.nearest) copy(dst, outfile, copy_src_overviews=True, **profile) assert cog.validate(outfile)
def test_validate_external_overview(tmpdir): import os from terracotta import cog outfile = str(tmpdir / 'raster.tif') raster_data = 1000 * np.random.rand(512, 512).astype(np.uint16) profile = BASE_PROFILE.copy() profile.update(height=raster_data.shape[0], width=raster_data.shape[1], tiled=True, blockxsize=256, blockysize=256) with rasterio.Env(TIFF_USE_OVR=True): with rasterio.open(outfile, 'w', **profile) as dst: dst.write(raster_data, 1) overviews = [2**j for j in range(1, 4)] dst.build_overviews(overviews, Resampling.nearest) assert os.path.isfile(f'{outfile}.ovr') assert not cog.validate(outfile)
def compute_metadata(cls, raster_path: str, *, extra_metadata: Any = None, use_chunks: bool = None, max_shape: Sequence[int] = None) -> Dict[str, Any]: """Read given raster file and compute metadata from it. This handles most of the heavy lifting during raster ingestion. The returned metadata can be passed directly to :meth:`insert`. Arguments: raster_path: Path to GDAL-readable raster file extra_metadata: Any additional metadata to attach to the dataset. Will be JSON-serialized and returned verbatim by :meth:`get_metadata`. use_chunks: Whether to process the image in chunks (slower, but uses less memory). If not given, use chunks for large images only. max_shape: Gives the maximum number of pixels used in each dimension to compute metadata. Setting this to a relatively small size such as ``(1024, 1024)`` will result in much faster metadata computation for large images, at the expense of inaccurate results. """ import rasterio from rasterio import warp from terracotta.cog import validate row_data: Dict[str, Any] = {} extra_metadata = extra_metadata or {} if max_shape is not None and len(max_shape) != 2: raise ValueError('max_shape argument must contain 2 values') if use_chunks and max_shape is not None: raise ValueError('Cannot use both use_chunks and max_shape arguments') with rasterio.Env(**cls._RIO_ENV_KEYS): if not validate(raster_path): warnings.warn( f'Raster file {raster_path} is not a valid cloud-optimized GeoTIFF. ' 'Any interaction with it will be significantly slower. Consider optimizing ' 'it through `terracotta optimize-rasters` before ingestion.', exceptions.PerformanceWarning, stacklevel=3 ) with rasterio.open(raster_path) as src: if src.nodata is None and not cls._has_alpha_band(src): warnings.warn( f'Raster file {raster_path} does not have a valid nodata value, ' 'and does not contain an alpha band. No data will be masked.' ) bounds = warp.transform_bounds( src.crs, 'epsg:4326', *src.bounds, densify_pts=21 ) if use_chunks is None and max_shape is None: use_chunks = src.width * src.height > RasterDriver._LARGE_RASTER_THRESHOLD if use_chunks: logger.debug( f'Computing metadata for file {raster_path} using more than ' f'{RasterDriver._LARGE_RASTER_THRESHOLD // 10**6}M pixels, iterating ' 'over chunks' ) if use_chunks and not has_crick: warnings.warn( 'Processing a large raster file, but crick failed to import. ' 'Reading whole file into memory instead.', exceptions.PerformanceWarning ) use_chunks = False if use_chunks: raster_stats = RasterDriver._compute_image_stats_chunked(src) else: raster_stats = RasterDriver._compute_image_stats(src, max_shape) if raster_stats is None: raise ValueError(f'Raster file {raster_path} does not contain any valid data') row_data.update(raster_stats) row_data['bounds'] = bounds row_data['metadata'] = extra_metadata return row_data
def compute_metadata(path: str, *, extra_metadata: Any = None, use_chunks: bool = None, max_shape: Sequence[int] = None, large_raster_threshold: int = None, rio_env_options: Dict[str, Any] = None) -> Dict[str, Any]: import rasterio from rasterio import warp from terracotta.cog import validate row_data: Dict[str, Any] = {} extra_metadata = extra_metadata or {} if max_shape is not None and len(max_shape) != 2: raise ValueError('max_shape argument must contain 2 values') if use_chunks and max_shape is not None: raise ValueError('Cannot use both use_chunks and max_shape arguments') if rio_env_options is None: rio_env_options = {} with rasterio.Env(**rio_env_options): if not validate(path): warnings.warn( f'Raster file {path} is not a valid cloud-optimized GeoTIFF. ' 'Any interaction with it will be significantly slower. Consider optimizing ' 'it through `terracotta optimize-rasters` before ingestion.', exceptions.PerformanceWarning, stacklevel=3) with rasterio.open(path) as src: if src.nodata is None and not has_alpha_band(src): warnings.warn( f'Raster file {path} does not have a valid nodata value, ' 'and does not contain an alpha band. No data will be masked.' ) bounds = warp.transform_bounds(src.crs, 'epsg:4326', *src.bounds, densify_pts=21) if use_chunks is None and max_shape is None and large_raster_threshold is not None: use_chunks = src.width * src.height > large_raster_threshold if use_chunks: logger.debug( f'Computing metadata for file {path} using more than ' f'{large_raster_threshold // 10**6}M pixels, iterating ' 'over chunks') if use_chunks and not has_crick: warnings.warn( 'Processing a large raster file, but crick failed to import. ' 'Reading whole file into memory instead.', exceptions.PerformanceWarning) use_chunks = False if use_chunks: raster_stats = compute_image_stats_chunked(src) else: raster_stats = compute_image_stats(src, max_shape) if raster_stats is None: raise ValueError(f'Raster file {path} does not contain any valid data') row_data.update(raster_stats) row_data['bounds'] = bounds row_data['metadata'] = extra_metadata return row_data