Exemple #1
0
class BaseBackend(BaseReader):
    """Base Class for cogeo-mosaic backend storage."""

    path: str = attr.ib()
    mosaic_def: MosaicJSON = attr.ib(default=None)
    reader: Type[BaseReader] = attr.ib(default=COGReader)
    reader_options: Dict = attr.ib(factory=dict)
    backend_options: Dict = attr.ib(factory=dict)

    # TMS is outside the init because mosaicJSON and cogeo-mosaic only
    # works with WebMercator (mercantile) for now.
    tms: TileMatrixSet = attr.ib(init=False, default=WEB_MERCATOR_TMS)

    _backend_name: str
    _file_byte_size: Optional[int] = 0

    @mosaic_def.validator
    def _check_mosaic_def(self, attribute, value):
        if value is not None:
            self.mosaic_def = MosaicJSON(**dict(value))

    def __attrs_post_init__(self):
        """Post Init: if not passed in init, try to read from self.path."""
        self.mosaic_def = self.mosaic_def or self._read(**self.backend_options)

        self.minzoom = self.mosaic_def.minzoom
        self.maxzoom = self.mosaic_def.maxzoom
        self.bounds = self.mosaic_def.bounds

    @property
    def center(self):
        """Return center from the mosaic definition."""
        return self.mosaic_def.center

    def info(self, quadkeys: bool = False) -> Info:  # type: ignore
        """Mosaic info."""
        return Info(
            bounds=self.mosaic_def.bounds,
            center=self.mosaic_def.center,
            maxzoom=self.mosaic_def.maxzoom,
            minzoom=self.mosaic_def.minzoom,
            name=self.mosaic_def.name if self.mosaic_def.name else "mosaic",
            quadkeys=[] if not quadkeys else self._quadkeys,
        )

    @property
    def _quadkeys(self) -> List[str]:
        """Return the list of quadkey tiles."""
        return list(self.mosaic_def.tiles)

    def stats(self):
        """PlaceHolder for BaseReader.stats."""
        raise NotImplementedError

    @property
    def metadata(self) -> Metadata:  # type: ignore
        """Retrieve Mosaic metadata

        Returns
        -------
        MosaicJSON as dict without `tiles` key.

        """
        return Metadata(**self.mosaic_def.dict())

    def assets_for_tile(self, x: int, y: int, z: int) -> List[str]:
        """Retrieve assets for tile."""
        return self.get_assets(x, y, z)

    def assets_for_point(self, lng: float, lat: float) -> List[str]:
        """Retrieve assets for point."""
        tile = mercantile.tile(lng, lat, self.quadkey_zoom)
        return self.get_assets(tile.x, tile.y, tile.z)

    @cached(
        TTLCache(maxsize=cache_config.maxsize, ttl=cache_config.ttl),
        key=lambda self, x, y, z: hashkey(self.path, x, y, z),
    )
    def get_assets(self, x: int, y: int, z: int) -> List[str]:
        """Find assets."""
        mercator_tile = mercantile.Tile(x=x, y=y, z=z)
        quadkeys = find_quadkeys(mercator_tile, self.quadkey_zoom)
        return list(
            itertools.chain.from_iterable(
                [self.mosaic_def.tiles.get(qk, []) for qk in quadkeys]))

    def tile(  # type: ignore
        self,
        x: int,
        y: int,
        z: int,
        reverse: bool = False,
        **kwargs: Any,
    ) -> Tuple[ImageData, List[str]]:
        """Get Tile from multiple observation."""
        assets = self.assets_for_tile(x, y, z)
        if not assets:
            raise NoAssetFoundError(f"No assets found for tile {z}-{x}-{y}")

        if reverse:
            assets = list(reversed(assets))

        def _reader(asset: str, x: int, y: int, z: int,
                    **kwargs: Any) -> ImageData:
            with self.reader(asset, **self.reader_options) as src_dst:
                return src_dst.tile(x, y, z, **kwargs)

        return mosaic_reader(assets, _reader, x, y, z, **kwargs)

    def point(
        self,
        lon: float,
        lat: float,
        threads=MAX_THREADS,
        reverse: bool = False,
        **kwargs: Any,
    ) -> List[Dict]:
        """Get Point value from multiple observation."""
        assets = self.assets_for_point(lon, lat)
        if not assets:
            raise NoAssetFoundError(f"No assets found for point ({lon},{lat})")

        if reverse:
            assets = list(reversed(assets))

        def _reader(asset: str, lon: float, lat: float, **kwargs) -> Dict:
            with self.reader(asset, **self.reader_options) as src_dst:
                return src_dst.point(lon, lat, **kwargs)

        tasks = create_tasks(_reader, assets, threads, lon, lat, **kwargs)
        return [{
            "asset": asset,
            "values": pt
        } for pt, asset in filter_tasks(
            tasks, allowed_exceptions=(PointOutsideBounds, ))]

    def preview(self):
        """PlaceHolder for BaseReader.preview."""
        raise NotImplementedError

    def part(self):
        """PlaceHolder for BaseReader.part."""
        raise NotImplementedError

    def feature(self):
        """PlaceHolder for BaseReader.feature."""
        raise NotImplementedError

    @abc.abstractmethod
    def _read(self) -> MosaicJSON:
        """Fetch mosaic definition"""

    @property
    def mosaicid(self) -> str:
        """Return sha224 id of the mosaicjson document."""
        return get_hash(**self.mosaic_def.dict(exclude_none=True))

    @property
    def quadkey_zoom(self) -> int:
        """Return Quadkey zoom property."""
        return self.mosaic_def.quadkey_zoom or self.mosaic_def.minzoom

    @abc.abstractmethod
    def write(self, overwrite: bool = True):
        """Upload new MosaicJSON to backend."""

    def update(
        self,
        features: Sequence[Dict],
        add_first: bool = True,
        quiet: bool = False,
        **kwargs,
    ):
        """Update existing MosaicJSON on backend."""
        new_mosaic = self.mosaic_def.from_features(
            features,
            self.mosaic_def.minzoom,
            self.mosaic_def.maxzoom,
            quadkey_zoom=self.quadkey_zoom,
            quiet=quiet,
            **kwargs,
        )

        for quadkey, new_assets in new_mosaic.tiles.items():
            tile = mercantile.quadkey_to_tile(quadkey)
            assets = self.assets_for_tile(*tile)
            assets = [*new_assets, *assets
                      ] if add_first else [*assets, *new_assets]

            # add custom sorting algorithm (e.g based on path name)
            self.mosaic_def.tiles[quadkey] = assets

        bounds = bbox_union(new_mosaic.bounds, self.mosaic_def.bounds)

        self.mosaic_def._increase_version()
        self.mosaic_def.bounds = bounds
        self.mosaic_def.center = (
            (bounds[0] + bounds[2]) / 2,
            (bounds[1] + bounds[3]) / 2,
            self.mosaic_def.minzoom,
        )

        # We only write if path is set
        if self.path:
            self.write(overwrite=True)

        return
Exemple #2
0
class DynamoDBBackend(BaseBackend):
    """DynamoDB Backend Adapter."""
    def __init__(
        self,
        table_name: str,
        mosaic_def: Optional[Union[MosaicJSON, Dict]] = None,
        region: str = os.getenv("AWS_REGION", "us-east-1"),
        client: Optional[Any] = None,
    ):
        """Initialize DynamoDBBackend."""
        self.client = client or boto3.resource("dynamodb", region_name=region)
        self.table = self.client.Table(table_name)
        self.path = f"dynamodb://{region}/{table_name}"

        if mosaic_def is not None:
            self.mosaic_def = MosaicJSON(**dict(mosaic_def))
        else:
            self.mosaic_def = self._read()

    def tile(self, x: int, y: int, z: int) -> List[str]:
        """Retrieve assets for tile."""
        return self.get_assets(x, y, z)

    def point(self, lng: float, lat: float) -> List[str]:
        """Retrieve assets for point."""
        tile = mercantile.tile(lng, lat, self.quadkey_zoom)
        return self.get_assets(tile.x, tile.y, tile.z)

    @property
    def _quadkeys(self) -> List[str]:
        """Return the list of quadkey tiles."""
        warnings.warn(
            "Performing full scan operation might be slow and expensive on large database."
        )
        resp = self.table.scan(
            ProjectionExpression="quadkey")  # TODO: Add pagination
        return [qk["quadkey"] for qk in resp["Items"] if qk["quadkey"] != "-1"]

    def write(self):
        """Write mosaicjson document to AWS DynamoDB."""
        self._create_table()
        items = self._create_items()
        self._write_items(items)

    def _update_quadkey(self, quadkey: str, dataset: List[str]):
        """Update quadkey list."""
        self.table.put_item(Item={"quadkey": quadkey, "assets": dataset})

    def _update_metadata(self):
        """Update bounds and center."""
        meta = json.loads(json.dumps(self.metadata), parse_float=Decimal)
        meta["quadkey"] = "-1"
        self.table.put_item(Item=meta)

    def update(
        self,
        features: Sequence[Dict],
        add_first: bool = True,
        quiet: bool = False,
        **kwargs,
    ):
        """Update existing MosaicJSON on backend."""
        new_mosaic = self.mosaic_def.from_features(
            features,
            self.mosaic_def.minzoom,
            self.mosaic_def.maxzoom,
            quadkey_zoom=self.quadkey_zoom,
            quiet=quiet,
            **kwargs,
        )

        fout = os.devnull if quiet else sys.stderr
        with click.progressbar(  # type: ignore
                new_mosaic.tiles.items(),
                file=fout,
                show_percent=True) as items:
            for quadkey, new_assets in items:
                tile = mercantile.quadkey_to_tile(quadkey)
                assets = self.tile(*tile)
                assets = [*new_assets, *assets
                          ] if add_first else [*assets, *new_assets]

                # add custom sorting algorithm (e.g based on path name)
                self._update_quadkey(quadkey, assets)

        bounds = bbox_union(new_mosaic.bounds, self.mosaic_def.bounds)

        self.mosaic_def._increase_version()
        self.mosaic_def.bounds = bounds
        self.mosaic_def.center = (
            (bounds[0] + bounds[2]) / 2,
            (bounds[1] + bounds[3]) / 2,
            self.mosaic_def.minzoom,
        )

        self._update_metadata()

        return

    def _create_table(self, billing_mode: str = "PAY_PER_REQUEST"):
        # Define schema for primary key
        # Non-keys don't need a schema
        attr_defs = [{"AttributeName": "quadkey", "AttributeType": "S"}]
        key_schema = [{"AttributeName": "quadkey", "KeyType": "HASH"}]

        # Note: errors if table already exists
        try:
            self.client.create_table(
                AttributeDefinitions=attr_defs,
                TableName=self.table.table_name,
                KeySchema=key_schema,
                BillingMode=billing_mode,
            )

            # If outside try/except block, could wait forever if unable to
            # create table
            self.table.wait_until_exists()
        except self.client.exceptions.ResourceInUseException:
            warnings.warn("Unable to create table, may already exist")
            return

    def _create_items(self) -> List[Dict]:
        items = []
        # Create one metadata item with quadkey=-1
        # Convert float to decimal
        # https://blog.ruanbekker.com/blog/2019/02/05/convert-float-to-decimal-data-types-for-boto3-dynamodb-using-python/
        meta = json.loads(json.dumps(self.metadata), parse_float=Decimal)

        # NOTE: quadkey is a string type
        meta["quadkey"] = "-1"
        items.append(meta)

        for quadkey, assets in self.mosaic_def.tiles.items():
            item = {"quadkey": quadkey, "assets": assets}
            items.append(item)

        return items

    def _write_items(self, items: List[Dict]):
        with self.table.batch_writer() as batch:
            with click.progressbar(items, length=len(items),
                                   show_percent=True) as progitems:
                for item in progitems:
                    batch.put_item(item)

    @functools.lru_cache(maxsize=512)
    def _read(self) -> MosaicJSON:  # type: ignore
        """Get Mosaic definition info."""
        meta = self._fetch_dynamodb("-1")

        # Numeric values are loaded from DynamoDB as Decimal types
        # Convert maxzoom, minzoom, quadkey_zoom to float/int
        for key in ["minzoom", "maxzoom", "quadkey_zoom"]:
            if meta.get(key):
                meta[key] = int(meta[key])

        # Convert bounds, center to float/int
        for key in ["bounds", "center"]:
            if meta.get(key):
                meta[key] = list(map(float, meta[key]))

        # Create pydantic class
        # For now, a tiles key must exist
        meta["tiles"] = {}
        return MosaicJSON(**meta)

    @functools.lru_cache(maxsize=512)
    def get_assets(self, x: int, y: int, z: int) -> List[str]:
        """Find assets."""
        mercator_tile = mercantile.Tile(x=x, y=y, z=z)
        quadkeys = find_quadkeys(mercator_tile, self.quadkey_zoom)

        assets = list(
            itertools.chain.from_iterable([
                self._fetch_dynamodb(qk).get("assets", []) for qk in quadkeys
            ]))

        # Find mosaics recursively?
        return assets

    def _fetch_dynamodb(self, quadkey: str) -> Dict:
        return self.table.get_item(Key={"quadkey": quadkey}).get("Item", {})