class BaseBackend(BaseReader): """Base Class for cogeo-mosaic backend storage.""" path: str = attr.ib() mosaic_def: MosaicJSON = attr.ib(default=None) reader: Type[BaseReader] = attr.ib(default=COGReader) reader_options: Dict = attr.ib(factory=dict) backend_options: Dict = attr.ib(factory=dict) # TMS is outside the init because mosaicJSON and cogeo-mosaic only # works with WebMercator (mercantile) for now. tms: TileMatrixSet = attr.ib(init=False, default=WEB_MERCATOR_TMS) _backend_name: str _file_byte_size: Optional[int] = 0 @mosaic_def.validator def _check_mosaic_def(self, attribute, value): if value is not None: self.mosaic_def = MosaicJSON(**dict(value)) def __attrs_post_init__(self): """Post Init: if not passed in init, try to read from self.path.""" self.mosaic_def = self.mosaic_def or self._read(**self.backend_options) self.minzoom = self.mosaic_def.minzoom self.maxzoom = self.mosaic_def.maxzoom self.bounds = self.mosaic_def.bounds @property def center(self): """Return center from the mosaic definition.""" return self.mosaic_def.center def info(self, quadkeys: bool = False) -> Info: # type: ignore """Mosaic info.""" return Info( bounds=self.mosaic_def.bounds, center=self.mosaic_def.center, maxzoom=self.mosaic_def.maxzoom, minzoom=self.mosaic_def.minzoom, name=self.mosaic_def.name if self.mosaic_def.name else "mosaic", quadkeys=[] if not quadkeys else self._quadkeys, ) @property def _quadkeys(self) -> List[str]: """Return the list of quadkey tiles.""" return list(self.mosaic_def.tiles) def stats(self): """PlaceHolder for BaseReader.stats.""" raise NotImplementedError @property def metadata(self) -> Metadata: # type: ignore """Retrieve Mosaic metadata Returns ------- MosaicJSON as dict without `tiles` key. """ return Metadata(**self.mosaic_def.dict()) def assets_for_tile(self, x: int, y: int, z: int) -> List[str]: """Retrieve assets for tile.""" return self.get_assets(x, y, z) def assets_for_point(self, lng: float, lat: float) -> List[str]: """Retrieve assets for point.""" tile = mercantile.tile(lng, lat, self.quadkey_zoom) return self.get_assets(tile.x, tile.y, tile.z) @cached( TTLCache(maxsize=cache_config.maxsize, ttl=cache_config.ttl), key=lambda self, x, y, z: hashkey(self.path, x, y, z), ) def get_assets(self, x: int, y: int, z: int) -> List[str]: """Find assets.""" mercator_tile = mercantile.Tile(x=x, y=y, z=z) quadkeys = find_quadkeys(mercator_tile, self.quadkey_zoom) return list( itertools.chain.from_iterable( [self.mosaic_def.tiles.get(qk, []) for qk in quadkeys])) def tile( # type: ignore self, x: int, y: int, z: int, reverse: bool = False, **kwargs: Any, ) -> Tuple[ImageData, List[str]]: """Get Tile from multiple observation.""" assets = self.assets_for_tile(x, y, z) if not assets: raise NoAssetFoundError(f"No assets found for tile {z}-{x}-{y}") if reverse: assets = list(reversed(assets)) def _reader(asset: str, x: int, y: int, z: int, **kwargs: Any) -> ImageData: with self.reader(asset, **self.reader_options) as src_dst: return src_dst.tile(x, y, z, **kwargs) return mosaic_reader(assets, _reader, x, y, z, **kwargs) def point( self, lon: float, lat: float, threads=MAX_THREADS, reverse: bool = False, **kwargs: Any, ) -> List[Dict]: """Get Point value from multiple observation.""" assets = self.assets_for_point(lon, lat) if not assets: raise NoAssetFoundError(f"No assets found for point ({lon},{lat})") if reverse: assets = list(reversed(assets)) def _reader(asset: str, lon: float, lat: float, **kwargs) -> Dict: with self.reader(asset, **self.reader_options) as src_dst: return src_dst.point(lon, lat, **kwargs) tasks = create_tasks(_reader, assets, threads, lon, lat, **kwargs) return [{ "asset": asset, "values": pt } for pt, asset in filter_tasks( tasks, allowed_exceptions=(PointOutsideBounds, ))] def preview(self): """PlaceHolder for BaseReader.preview.""" raise NotImplementedError def part(self): """PlaceHolder for BaseReader.part.""" raise NotImplementedError def feature(self): """PlaceHolder for BaseReader.feature.""" raise NotImplementedError @abc.abstractmethod def _read(self) -> MosaicJSON: """Fetch mosaic definition""" @property def mosaicid(self) -> str: """Return sha224 id of the mosaicjson document.""" return get_hash(**self.mosaic_def.dict(exclude_none=True)) @property def quadkey_zoom(self) -> int: """Return Quadkey zoom property.""" return self.mosaic_def.quadkey_zoom or self.mosaic_def.minzoom @abc.abstractmethod def write(self, overwrite: bool = True): """Upload new MosaicJSON to backend.""" def update( self, features: Sequence[Dict], add_first: bool = True, quiet: bool = False, **kwargs, ): """Update existing MosaicJSON on backend.""" new_mosaic = self.mosaic_def.from_features( features, self.mosaic_def.minzoom, self.mosaic_def.maxzoom, quadkey_zoom=self.quadkey_zoom, quiet=quiet, **kwargs, ) for quadkey, new_assets in new_mosaic.tiles.items(): tile = mercantile.quadkey_to_tile(quadkey) assets = self.assets_for_tile(*tile) assets = [*new_assets, *assets ] if add_first else [*assets, *new_assets] # add custom sorting algorithm (e.g based on path name) self.mosaic_def.tiles[quadkey] = assets bounds = bbox_union(new_mosaic.bounds, self.mosaic_def.bounds) self.mosaic_def._increase_version() self.mosaic_def.bounds = bounds self.mosaic_def.center = ( (bounds[0] + bounds[2]) / 2, (bounds[1] + bounds[3]) / 2, self.mosaic_def.minzoom, ) # We only write if path is set if self.path: self.write(overwrite=True) return
class DynamoDBBackend(BaseBackend): """DynamoDB Backend Adapter.""" def __init__( self, table_name: str, mosaic_def: Optional[Union[MosaicJSON, Dict]] = None, region: str = os.getenv("AWS_REGION", "us-east-1"), client: Optional[Any] = None, ): """Initialize DynamoDBBackend.""" self.client = client or boto3.resource("dynamodb", region_name=region) self.table = self.client.Table(table_name) self.path = f"dynamodb://{region}/{table_name}" if mosaic_def is not None: self.mosaic_def = MosaicJSON(**dict(mosaic_def)) else: self.mosaic_def = self._read() def tile(self, x: int, y: int, z: int) -> List[str]: """Retrieve assets for tile.""" return self.get_assets(x, y, z) def point(self, lng: float, lat: float) -> List[str]: """Retrieve assets for point.""" tile = mercantile.tile(lng, lat, self.quadkey_zoom) return self.get_assets(tile.x, tile.y, tile.z) @property def _quadkeys(self) -> List[str]: """Return the list of quadkey tiles.""" warnings.warn( "Performing full scan operation might be slow and expensive on large database." ) resp = self.table.scan( ProjectionExpression="quadkey") # TODO: Add pagination return [qk["quadkey"] for qk in resp["Items"] if qk["quadkey"] != "-1"] def write(self): """Write mosaicjson document to AWS DynamoDB.""" self._create_table() items = self._create_items() self._write_items(items) def _update_quadkey(self, quadkey: str, dataset: List[str]): """Update quadkey list.""" self.table.put_item(Item={"quadkey": quadkey, "assets": dataset}) def _update_metadata(self): """Update bounds and center.""" meta = json.loads(json.dumps(self.metadata), parse_float=Decimal) meta["quadkey"] = "-1" self.table.put_item(Item=meta) def update( self, features: Sequence[Dict], add_first: bool = True, quiet: bool = False, **kwargs, ): """Update existing MosaicJSON on backend.""" new_mosaic = self.mosaic_def.from_features( features, self.mosaic_def.minzoom, self.mosaic_def.maxzoom, quadkey_zoom=self.quadkey_zoom, quiet=quiet, **kwargs, ) fout = os.devnull if quiet else sys.stderr with click.progressbar( # type: ignore new_mosaic.tiles.items(), file=fout, show_percent=True) as items: for quadkey, new_assets in items: tile = mercantile.quadkey_to_tile(quadkey) assets = self.tile(*tile) assets = [*new_assets, *assets ] if add_first else [*assets, *new_assets] # add custom sorting algorithm (e.g based on path name) self._update_quadkey(quadkey, assets) bounds = bbox_union(new_mosaic.bounds, self.mosaic_def.bounds) self.mosaic_def._increase_version() self.mosaic_def.bounds = bounds self.mosaic_def.center = ( (bounds[0] + bounds[2]) / 2, (bounds[1] + bounds[3]) / 2, self.mosaic_def.minzoom, ) self._update_metadata() return def _create_table(self, billing_mode: str = "PAY_PER_REQUEST"): # Define schema for primary key # Non-keys don't need a schema attr_defs = [{"AttributeName": "quadkey", "AttributeType": "S"}] key_schema = [{"AttributeName": "quadkey", "KeyType": "HASH"}] # Note: errors if table already exists try: self.client.create_table( AttributeDefinitions=attr_defs, TableName=self.table.table_name, KeySchema=key_schema, BillingMode=billing_mode, ) # If outside try/except block, could wait forever if unable to # create table self.table.wait_until_exists() except self.client.exceptions.ResourceInUseException: warnings.warn("Unable to create table, may already exist") return def _create_items(self) -> List[Dict]: items = [] # Create one metadata item with quadkey=-1 # Convert float to decimal # https://blog.ruanbekker.com/blog/2019/02/05/convert-float-to-decimal-data-types-for-boto3-dynamodb-using-python/ meta = json.loads(json.dumps(self.metadata), parse_float=Decimal) # NOTE: quadkey is a string type meta["quadkey"] = "-1" items.append(meta) for quadkey, assets in self.mosaic_def.tiles.items(): item = {"quadkey": quadkey, "assets": assets} items.append(item) return items def _write_items(self, items: List[Dict]): with self.table.batch_writer() as batch: with click.progressbar(items, length=len(items), show_percent=True) as progitems: for item in progitems: batch.put_item(item) @functools.lru_cache(maxsize=512) def _read(self) -> MosaicJSON: # type: ignore """Get Mosaic definition info.""" meta = self._fetch_dynamodb("-1") # Numeric values are loaded from DynamoDB as Decimal types # Convert maxzoom, minzoom, quadkey_zoom to float/int for key in ["minzoom", "maxzoom", "quadkey_zoom"]: if meta.get(key): meta[key] = int(meta[key]) # Convert bounds, center to float/int for key in ["bounds", "center"]: if meta.get(key): meta[key] = list(map(float, meta[key])) # Create pydantic class # For now, a tiles key must exist meta["tiles"] = {} return MosaicJSON(**meta) @functools.lru_cache(maxsize=512) def get_assets(self, x: int, y: int, z: int) -> List[str]: """Find assets.""" mercator_tile = mercantile.Tile(x=x, y=y, z=z) quadkeys = find_quadkeys(mercator_tile, self.quadkey_zoom) assets = list( itertools.chain.from_iterable([ self._fetch_dynamodb(qk).get("assets", []) for qk in quadkeys ])) # Find mosaics recursively? return assets def _fetch_dynamodb(self, quadkey: str) -> Dict: return self.table.get_item(Key={"quadkey": quadkey}).get("Item", {})