def test_remote_database_cache(s3_db_factory, raster_file, monkeypatch): keys = ('some', 'keys') dbpath = s3_db_factory(keys) from terracotta import get_driver driver = get_driver(dbpath) driver.meta_store._last_updated = -float('inf') with driver.connect(): assert driver.key_names == keys assert driver.get_datasets() == {} modification_date = os.path.getmtime(driver.meta_store._local_path) s3_db_factory(keys, datasets={('some', 'value'): str(raster_file)}) # no change yet assert driver.get_datasets() == {} assert os.path.getmtime(driver.meta_store._local_path) == modification_date # check if remote db is cached correctly driver.meta_store._last_updated = time.time() with driver.connect(): # db connection is cached; so still no change assert driver.get_datasets() == {} assert os.path.getmtime(driver.meta_store._local_path) == modification_date # invalidate cache driver.meta_store._last_updated = -float('inf') with driver.connect(): # now db is updated on reconnect assert list(driver.get_datasets().keys()) == [('some', 'value')] assert os.path.getmtime(driver.meta_store._local_path) != modification_date
def test_compute_consistency(use_testdb, testdb, raster_file_xyz): import terracotta from terracotta.xyz import get_tile_data from terracotta.handlers import compute from terracotta.image import to_uint8 settings = terracotta.get_settings() raw_img = compute.compute( 'v1 + v2', ['val21', 'x'], {'v1': 'val22', 'v2': 'val23'}, stretch_range=(0, 10000), tile_xyz=raster_file_xyz ) img_data = np.asarray(Image.open(raw_img)) assert img_data.shape == settings.DEFAULT_TILE_SIZE driver = terracotta.get_driver(testdb) with driver.connect(): v1 = get_tile_data(driver, ['val21', 'x', 'val22'], raster_file_xyz) v2 = get_tile_data(driver, ['val21', 'x', 'val23'], raster_file_xyz) np.testing.assert_array_equal( img_data, to_uint8(v1 + v2, 0, 10000) )
def test_ingest_append(raster_file, tmpworkdir): from terracotta.scripts import cli for infile in ('dir1/img1.tif', 'dir2/img2.tif'): temp_infile = tmpworkdir / infile os.makedirs(temp_infile.dirpath(), exist_ok=True) shutil.copy(raster_file, temp_infile) outfile = tmpworkdir / 'out.sqlite' runner = CliRunner() result = runner.invoke(cli.cli, ['ingest', 'dir1/{name}.tif', '-o', str(outfile)]) assert result.exit_code == 0 assert outfile.check() result = runner.invoke(cli.cli, ['ingest', 'dir2/{name}.tif', '-o', str(outfile)]) assert result.exit_code == 0 assert outfile.check() from terracotta import get_driver driver = get_driver(str(outfile), provider='sqlite') assert driver.key_names == ('name', ) assert all((ds, ) in driver.get_datasets() for ds in ('img1', 'img2'))
def test_rgb_stretch(stretch_range, use_testdb, testdb, raster_file_xyz): import terracotta from terracotta.xyz import get_tile_data from terracotta.handlers import rgb ds_keys = ['val21', 'x', 'val22'] raw_img = rgb.rgb(ds_keys[:2], ['val22', 'val23', 'val24'], raster_file_xyz, stretch_ranges=[stretch_range] * 3) img_data = np.asarray(Image.open(raw_img))[..., 0] # get unstretched data to compare to driver = terracotta.get_driver(testdb) with driver.connect(): tile_data = get_tile_data(driver, ds_keys, tile_xyz=raster_file_xyz, tile_size=img_data.shape) # filter transparent values valid_mask = ~tile_data.mask assert np.all(img_data[~valid_mask] == 0) valid_img = img_data[valid_mask] valid_data = tile_data.compressed() assert np.all(valid_img[valid_data < stretch_range[0]] == 1) stretch_range_mask = (valid_data > stretch_range[0]) & (valid_data < stretch_range[1]) assert np.all(valid_img[stretch_range_mask] >= 1) assert np.all(valid_img[stretch_range_mask] <= 255) assert np.all(valid_img[valid_data > stretch_range[1]] == 255)
def benchmark_database(big_raster_file_nodata, big_raster_file_mask, tmpdir_factory): from terracotta import get_driver, update_settings keys = ['type', 'band'] update_settings(RASTER_CACHE_SIZE=0) dbpath = tmpdir_factory.mktemp('db').join('db-readonly.sqlite') driver = get_driver(dbpath, provider='sqlite') driver.create(keys) mtd = driver.compute_metadata(str(big_raster_file_nodata)) with driver.connect(): driver.insert(['nodata', '1'], str(big_raster_file_nodata), metadata=mtd) driver.insert(['nodata', '2'], str(big_raster_file_nodata), metadata=mtd) driver.insert(['nodata', '3'], str(big_raster_file_nodata), metadata=mtd) driver.insert(['mask', '1'], str(big_raster_file_mask), metadata=mtd) return dbpath
def test_remote_database(s3_db_factory): keys = ('some', 'keys') dbpath = s3_db_factory(keys) from terracotta import get_driver driver = get_driver(dbpath) assert driver.key_names == keys
def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]: """Returns all metadata for a single dataset""" settings = get_settings() driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) metadata = driver.get_metadata(keys) metadata['keys'] = OrderedDict(zip(driver.key_names, keys)) return metadata
def point(keys: str, lat: str, lng: str): keys = keys.split("/") settings = get_settings() driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) with driver.connect(): dataset = get_unique_dataset(driver, keys) result = get_point_data(float(lat), float(lng), dataset[1]) return jsonify(result)
def test_key_handler(testdb, use_testdb): import terracotta from terracotta.handlers import keys driver = terracotta.get_driver(str(testdb)) handler_response = keys.keys() assert handler_response assert tuple(row['key'] for row in handler_response) == driver.key_names
def ingest(raster_pattern: RasterPatternType, output_file: Path, skip_metadata: bool = False, rgb_key: str = None, quiet: bool = False) -> None: """Ingest a collection of raster files into a (new or existing) SQLite database. First argument is a format pattern defining paths and keys of all raster files. Example: $ terracotta create-database /path/to/rasters/{name}/{date}_{band}{}.tif -o out.sqlite The empty group {} is replaced by a wildcard matching anything (similar to * in glob patterns). Existing datasets are silently overwritten. This command only supports the creation of a simple, local SQLite database without any additional metadata. For more sophisticated use cases use the Terracotta Python API. """ from terracotta import get_driver keys, raster_files = raster_pattern if rgb_key is not None: if rgb_key not in keys: raise click.BadParameter('RGB key not found in raster pattern') # re-order keys rgb_idx = keys.index(rgb_key) def push_to_last(seq: Sequence[Any], index: int) -> Tuple[Any, ...]: return (*seq[:index], *seq[index + 1:], seq[index]) keys = list(push_to_last(keys, rgb_idx)) raster_files = { push_to_last(k, rgb_idx): v for k, v in raster_files.items() } driver = get_driver(output_file) if not output_file.is_file(): driver.create(keys) if tuple(keys) != driver.key_names: click.echo( f'Database file {output_file!s} has incompatible key names {driver.key_names}', err=True) click.Abort() with driver.connect(): progress = tqdm.tqdm(raster_files.items(), desc='Ingesting raster files', disable=quiet) for key, filepath in progress: driver.insert(key, filepath, skip_metadata=skip_metadata)
def _get_data(): driver = tc.get_driver(tc.get_settings().DRIVER_PATH) data = {} with driver.connect(): for k in driver.get_datasets(): title, year = k if not title in data: data[title] = {} data[title][year] = driver.get_metadata(k) return data
def test_colormap_consistency(use_testdb, testdb, raster_file_xyz, stretch_range, cmap_name): """Test consistency between /colormap and images returned by /singleband""" import terracotta from terracotta.xyz import get_tile_data from terracotta.handlers import singleband, colormap ds_keys = ['val21', 'x', 'val22'] # get image with applied stretch and colormap raw_img = singleband.singleband(ds_keys, raster_file_xyz, stretch_range=stretch_range, colormap=cmap_name) img_data = np.asarray(Image.open(raw_img).convert('RGBA')) # get raw data to compare to driver = terracotta.get_driver(testdb) with driver.connect(): tile_data = get_tile_data(driver, ds_keys, tile_xyz=raster_file_xyz, tile_size=img_data.shape[:2]) # make sure all pixel values are included in colormap num_values = stretch_range[1] - stretch_range[0] + 1 # get colormap for given stretch cmap = colormap.colormap(colormap=cmap_name, stretch_range=stretch_range, num_values=num_values) cmap = dict(row.values() for row in cmap) # test nodata nodata_mask = tile_data.mask assert np.all(img_data[nodata_mask, -1] == 0) # test clipping below_mask = tile_data < stretch_range[0] assert np.all(img_data[below_mask & ~nodata_mask, :-1] == cmap[stretch_range[0]][:-1]) above_mask = tile_data > stretch_range[1] assert np.all(img_data[above_mask & ~nodata_mask, :-1] == cmap[stretch_range[1]][:-1]) # test values inside stretch_range values_to_test = np.unique(tile_data.compressed()) values_to_test = values_to_test[(values_to_test >= stretch_range[0]) & (values_to_test <= stretch_range[1])] for val in values_to_test: rgba = cmap[val] assert np.all(img_data[tile_data == val] == rgba)
def singleband(keys: Union[Sequence[str], Mapping[str, str]], tile_xyz: Tuple[int, int, int] = None, *, colormap: Union[str, Mapping[Number, RGB], None] = None, stretch_range: Tuple[Number, Number] = None, tile_size: Tuple[int, int] = None) -> BinaryIO: """Return singleband image as PNG""" cmap_or_palette: Union[str, Sequence[RGB], None] if stretch_range is None: stretch_min, stretch_max = None, None else: stretch_min, stretch_max = stretch_range preserve_values = isinstance(colormap, collections.Mapping) settings = get_settings() if tile_size is None: tile_size = settings.DEFAULT_TILE_SIZE driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) with driver.connect(): metadata = driver.get_metadata(keys) tile_data = xyz.get_tile_data(driver, keys, tile_xyz, tile_size=tile_size, preserve_values=preserve_values) if preserve_values: # bin output image into supplied labels, starting at 1 colormap = cast(Mapping, colormap) labels, label_colors = list(colormap.keys()), list(colormap.values()) cmap_or_palette = label_colors out = image.label(tile_data, labels) else: # determine stretch range from metadata and arguments stretch_range_ = list(metadata['range']) if stretch_min is not None: stretch_range_[0] = stretch_min if stretch_max is not None: stretch_range_[1] = stretch_max cmap_or_palette = cast(Optional[str], colormap) out = image.to_uint8(tile_data, *stretch_range_) return image.array_to_png(out, colormap=cmap_or_palette)
def test_key_handler_env_config(testdb, monkeypatch): import terracotta from terracotta.handlers import keys monkeypatch.setenv('TC_DRIVER_PATH', str(testdb)) terracotta.update_settings() driver = terracotta.get_driver(str(testdb)) handler_response = keys.keys() assert handler_response assert tuple(row['key'] for row in handler_response) == driver.key_names
def test_singleband_explicit_colormap(use_testdb, testdb, raster_file_xyz): import terracotta from terracotta.xyz import get_tile_data from terracotta.handlers import singleband ds_keys = ['val21', 'x', 'val22'] nodata = 10000 settings = terracotta.get_settings() driver = terracotta.get_driver(testdb) with driver.connect(): tile_data = get_tile_data(driver, ds_keys, tile_xyz=raster_file_xyz, preserve_values=True, tile_size=settings.DEFAULT_TILE_SIZE) # Get some values from the raster to use for colormap classes = np.unique(tile_data) classes = classes[:254] colormap = {} for i in range(classes.shape[0]): val = classes[i] color = val % 256 colormap[val] = (color, color, color, color) colormap[nodata] = (100, 100, 100, 100) raw_img = singleband.singleband(ds_keys, raster_file_xyz, colormap=colormap) img_data = np.asarray(Image.open(raw_img).convert('RGBA')) # get unstretched data to compare to with driver.connect(): tile_data = get_tile_data(driver, ds_keys, tile_xyz=raster_file_xyz, preserve_values=True, tile_size=img_data.shape[:2]) # check that labels are mapped to colors correctly for cmap_label, cmap_color in colormap.items(): if cmap_label == nodata: # make sure nodata is still transparent assert np.all(img_data[tile_data == cmap_label, -1] == 0) else: assert np.all( img_data[tile_data == cmap_label] == np.asarray(cmap_color)) # check that all data outside of labels is transparent keys_arr = np.array(list(colormap.keys()), dtype=np.int16) assert np.all(img_data[~np.isin(tile_data, keys_arr), -1] == 0)
def test_datasets_handler(testdb, use_testdb): import terracotta from terracotta.handlers import datasets driver = terracotta.get_driver(str(testdb)) keys = driver.key_names assert datasets.datasets() assert datasets.datasets() == [ dict(zip(keys, pair)) for pair in driver.get_datasets().keys() ] # check key order assert all(tuple(ds.keys()) == keys for ds in datasets.datasets())
def use_non_writable_testdb(testdb, monkeypatch, raster_file): import terracotta terracotta.update_settings(DRIVER_PATH=str(testdb)) driver = terracotta.get_driver(testdb) with driver.connect(): driver.insert(('first', 'second', 'third'), str(raster_file), skip_metadata=True) with monkeypatch.context() as m: m.setattr(driver.meta_store, "_WRITABLE", False) yield driver.delete(('first', 'second', 'third'))
def testdb(GCBM_raster_files, tmpdir_factory): """A pre-populated Terracotta raster database. (TODO: Add metadata)""" from terracotta import get_driver keys = ['title', 'year'] dbpath = tmpdir_factory.mktemp('db').join('db-readonly.sqlite') driver = get_driver(dbpath, provider='sqlite') driver.create(keys) with driver.connect(): for raster in GCBM_raster_files: driver.insert(('val11', 'x'), str(raster)) return dbpath
def keys() -> List[Dict[str, str]]: """List available keys, in order""" settings = get_settings() driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) response = [] for key, description in driver.get_keys().items(): response_row = {'key': key} if description: response_row['description'] = description response.append(response_row) return response
def datasets(some_keys: Mapping[str, str] = None, page: int = 0, limit: int = 500) -> 'List[OrderedDict[str, str]]': """List all available key combinations""" settings = get_settings() driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) with driver.connect(): dataset_keys = driver.get_datasets(where=some_keys, page=page, limit=limit).keys() key_names = driver.key_names return [OrderedDict(zip(key_names, ds_keys)) for ds_keys in dataset_keys]
def test_immutability(s3_db_factory, raster_file): keys = ('some', 'keys') dbpath = s3_db_factory(keys, datasets={('some', 'value'): str(raster_file)}) from terracotta import get_driver driver = get_driver(dbpath) with pytest.raises(exceptions.DatabaseNotWritableError): driver.create(keys) with pytest.raises(exceptions.DatabaseNotWritableError): driver.insert(('some', 'value'), str(raster_file)) with pytest.raises(exceptions.DatabaseNotWritableError): driver.delete(('some', 'value'))
def test_ingest(set_config, GCBM_raster_files, GCBM_compiled_output, tmpdir): from taswira.scripts.ingestion import ingest from terracotta import get_driver set_config() rasterdir = GCBM_raster_files[0].dirname dbpath = ingest(rasterdir, GCBM_compiled_output, tmpdir) assert os.path.exists(dbpath) driver = get_driver(dbpath, provider='sqlite') assert driver.key_names == ('title', 'year') datasets = driver.get_datasets() assert len(datasets) == len(GCBM_raster_files)
def test_ingest(raster_file, tmpdir): from terracotta.scripts import cli outfile = tmpdir / 'out.sqlite' input_pattern = str(raster_file.dirpath('{name}.tif')) runner = CliRunner() result = runner.invoke(cli.cli, ['ingest', input_pattern, '-o', str(outfile)]) assert result.exit_code == 0 assert outfile.check() from terracotta import get_driver driver = get_driver(str(outfile), provider='sqlite') assert driver.key_names == ('name', ) assert driver.get_datasets() == {('img', ): str(raster_file)}
def test_destructor(s3_db_factory, raster_file, capsys): keys = ('some', 'keys') dbpath = s3_db_factory(keys, datasets={('some', 'value'): str(raster_file)}) from terracotta import get_driver driver = get_driver(dbpath) assert os.path.isfile(driver.meta_store._local_path) driver.meta_store.__del__() assert not os.path.isfile(driver.meta_store._local_path) captured = capsys.readouterr() assert 'Exception ignored' not in captured.err # re-create file to prevent actual destructor from failing with open(driver.meta_store._local_path, 'w'): pass
def ingest(rasterdir, db_results, outputdir, allow_unoptimized=False): """Ingest raster files into a Terracotta database. Args: rasterdir: Path to directory containing raster files. db_results: Path to DB containing non-spatial data. outputdir: Path to directory for saving the generated DB. allow_unoptimized: Should unoptimized raster files be processed? Returns: Path to generated DB. """ driver = get_driver(os.path.join(outputdir, DB_NAME), provider='sqlite') driver.create(GCBM_RASTER_KEYS, GCBM_RASTER_KEYS_DESCRIPTION) progress = tqdm.tqdm(get_config(), desc='Searching raster files') raster_files = [] for config in progress: for file in glob.glob(rasterdir + os.sep + config['file_pattern']): if not is_valid_cog(file) and not allow_unoptimized: raise UnoptimizedRaster raster_files.append(dict(path=file, **config)) with driver.connect(): metadata = get_metadata(db_results) progress = tqdm.tqdm(raster_files, desc='Processing raster files') for raster in progress: title = raster.get('title', raster['database_indicator']) year = _find_raster_year(raster['path']) unit = find_units(raster.get('graph_units')) computed_metadata = driver.compute_metadata( raster['path'], extra_metadata={ 'indicator_value': str(metadata[title][year]), 'colormap': raster.get('palette').lower(), 'unit': unit.value[2] }) driver.insert((title, year), raster['path'], metadata=computed_metadata) return driver.path
def init_db(keys, key_descriptions=None, nuke=False): driver = tc.get_driver(DRIVER_PATH) # Create an empty database if it doesn't exist os.makedirs(os.path.dirname(DRIVER_PATH), exist_ok=True) if nuke and os.path.isfile(DRIVER_PATH): os.remove(DRIVER_PATH) if not os.path.isfile(DRIVER_PATH): driver.create(keys=keys, key_descriptions=key_descriptions) # Insert the parameters. with driver.connect(): for entry in [ entry for entry in os.listdir(GEOTIFF_DIR) if entry[-5:] == ".tiff" ]: tiff_path = os.path.join(GEOTIFF_DIR, entry) tiff_dict = { keys[i]: value for i, value in enumerate(entry[:-5].split(".")) } driver.insert(tiff_dict, tiff_path)
def test_bench_singleband(benchmark, zoom, resampling, big_raster_file_nodata, benchmark_database): from terracotta.server import create_app from terracotta import update_settings, get_driver update_settings(DRIVER_PATH=str(benchmark_database), RESAMPLING_METHOD=resampling, REPROJECTION_METHOD=resampling) zoom_level = ZOOM_XYZ[zoom] flask_app = create_app() with flask_app.test_client() as client: if zoom_level is not None: x, y, z = get_xyz(big_raster_file_nodata, zoom_level) rv = benchmark(client.get, f'/singleband/nodata/1/{z}/{x}/{y}.png') else: rv = benchmark(client.get, '/singleband/nodata/1/preview.png') assert rv.status_code == 200 assert not len(get_driver(str(benchmark_database))._raster_cache)
def check_integrity(zappa_cli): command = zappa_cli.command if command not in ('deploy', 'update'): return env = zappa_cli.aws_environment_variables or {} db_provider = env.get('TC_DRIVER_PROVIDER') if db_provider not in REMOTE_DRIVERS: raise ValueError( f'TC_DRIVER_PROVIDER environment variable must be one of {REMOTE_DRIVERS}' ) db_path = env.get('TC_DRIVER_PATH') if not db_path: raise ValueError('TC_DRIVER_PATH environment variable must be set') try: from terracotta import get_driver, exceptions except ImportError as exc: raise RuntimeError( 'Terracotta must be installed before deployment (e.g. via `pip install .`)' ) from exc os.environ.update(env) driver = get_driver(db_path, provider=db_provider) # this checks if DB is reachable, readable, and whether its version matches try: with driver.connect(): some_dataset = next(iter(driver.get_datasets().keys())) except exceptions.InvalidDatabaseError as exc: raise RuntimeError( 'Error while connecting to remote database. Please double-check your AWS environment ' 'variables, and make sure your machine has access to the remote Terracotta database.' ) from exc # this makes sure that a random raster file is readable with driver.connect(): driver.get_raster_tile(some_dataset)
def _s3_db_factory(keys, datasets=None): from terracotta import get_driver with tempfile.TemporaryDirectory() as tmpdir: dbfile = Path(tmpdir) / 'tc.sqlite' driver = get_driver(dbfile) driver.create(keys) if datasets: for keys, path in datasets.items(): driver.insert(keys, path) with open(dbfile, 'rb') as f: db_bytes = f.read() conn = boto3.resource('s3') conn.create_bucket(Bucket=bucketname) s3 = boto3.client('s3') s3.put_object(Bucket=bucketname, Key='tc.sqlite', Body=db_bytes) return f's3://{bucketname}/tc.sqlite'
def testdb(raster_file, tmpdir_factory): """A read-only, pre-populated test database""" from terracotta import get_driver keys = ['key1', 'akey', 'key2'] key_descriptions = { 'key2': 'key2' } dbpath = tmpdir_factory.mktemp('db').join('db-readonly.sqlite') driver = get_driver(dbpath, provider='sqlite') driver.create(keys, key_descriptions=key_descriptions) metadata = driver.compute_metadata(str(raster_file), extra_metadata=['extra_data']) with driver.connect(): driver.insert(('val11', 'x', 'val12'), str(raster_file), metadata=metadata) driver.insert(('val21', 'x', 'val22'), str(raster_file)) driver.insert(('val21', 'x', 'val23'), str(raster_file)) driver.insert(('val21', 'x', 'val24'), str(raster_file)) return dbpath