def query(uri, layer_name, layer_zoom=None, query_geom=None, time_intervals=None, query_proj=None, num_partitions=None): """Queries a single, zoom layer from a GeoTrellis catalog given spatial and/or time parameters. Note: The whole layer could still be read in if ``intersects`` and/or ``time_intervals`` have not been set, or if the querried region contains the entire layer. Args: layer_type (str or :class:`~geopyspark.geotrellis.constants.LayerType`): What the layer type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. layer_name (str): The name of the GeoTrellis catalog to be querried. layer_zoom (int, optional): The zoom level of the layer that is to be querried. If ``None``, then the ``layer_zoom`` will be set to 0. query_geom (bytes or shapely.geometry or :class:`~geopyspark.geotrellis.Extent`, Optional): The desired spatial area to be returned. Can either be a string, a shapely geometry, or instance of ``Extent``, or a WKB verson of the geometry. Note: Not all shapely geometires are supported. The following is are the types that are supported: * Point * Polygon * MultiPolygon Note: Only layers that were made from spatial, singleband GeoTiffs can query a ``Point``. All other types are restricted to ``Polygon`` and ``MulitPolygon``. Note: If the queried region does not intersect the layer, then an empty layer will be returned. If not specified, then the entire layer will be read. time_intervals (``[datetime.datetime]``, optional): A list of the time intervals to query. This parameter is only used when querying spatial-temporal data. The default value is, ``None``. If ``None``, then only the spatial area will be querried. query_proj (int or str, optional): The crs of the querried geometry if it is different than the layer it is being filtered against. If they are different and this is not set, then the returned ``TiledRasterLayer`` could contain incorrect values. If ``None``, then the geometry and layer are assumed to be in the same projection. num_partitions (int, optional): Sets RDD partition count when reading from catalog. Returns: :class:`~geopyspark.geotrellis.layer.TiledRasterLayer` """ pysc = get_spark_context() layer_zoom = layer_zoom or 0 if query_geom is None: pass # pass as Null to Java elif isinstance(query_geom, Extent): query_geom = query_geom.to_polygon query_geom = shapely.wkb.dumps(query_geom) elif isinstance(query_geom, (Polygon, MultiPolygon, Point)): query_geom = shapely.wkb.dumps(query_geom) elif isinstance(query_geom, bytes): pass # assume bytes are WKB else: raise TypeError("Could not query intersection", query_geom) if isinstance(query_proj, int): query_proj = str(query_proj) if time_intervals: for x, time in enumerate(time_intervals): if time.tzinfo: time_intervals[x] = time.astimezone(pytz.utc).isoformat() else: time_intervals[x] = time.replace(tzinfo=pytz.utc).isoformat() else: time_intervals = [] reader = pysc._gateway.jvm.geopyspark.geotrellis.io.LayerReaderWrapper(pysc._jsc.sc()) srdd = reader.query(uri, layer_name, layer_zoom, query_geom, time_intervals, query_proj, num_partitions) layer_type = LayerType._from_key_name(srdd.keyClassName()) return TiledRasterLayer(layer_type, srdd)
def get(layer_type, uri, crs=None, max_tile_size=DEFAULT_MAX_TILE_SIZE, num_partitions=None, chunk_size=DEFAULT_CHUNK_SIZE, partition_bytes=DEFAULT_PARTITION_BYTES, time_tag=DEFAULT_GEOTIFF_TIME_TAG, time_format=DEFAULT_GEOTIFF_TIME_FORMAT, delimiter=None, s3_client=DEFAULT_S3_CLIENT, s3_credentials=None): """Creates a ``RasterLayer`` from GeoTiffs that are located on the local file system, ``HDFS``, or ``S3``. Args: layer_type (str or :class:`~geopyspark.geotrellis.constants.LayerType`): What the layer type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. Note: All of the GeoTiffs must have the same saptial type. uri (str or [str]): The path or list of paths to the desired tile(s)/directory(ies). crs (str or int, optional): The CRS that the output tiles should be in. If ``None``, then the CRS that the tiles were originally in will be used. max_tile_size (int or None, optional): The max size of each tile in the resulting Layer. If the size is smaller than the read in tile, then that tile will be broken into smaller sections of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`. If ``None``, then the whole tile will be read in. num_partitions (int, optional): The number of partitions Spark will make when the data is repartitioned. If ``None``, then the data will not be repartitioned. Note: If ``max_tile_size`` is also specified then this parameter will be ignored. partition_bytes (int, optional): The desired number of bytes per partition. This is will ensure that at least one item is assigned for each partition. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_PARTITION_BYTES`. chunk_size (int, optional): How many bytes of the file should be read in at a time. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_CHUNK_SIZE`. time_tag (str, optional): The name of the tiff tag that contains the time stamp for the tile. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_GEOTIFF_TIME_TAG`. time_format (str, optional): The pattern of the time stamp to be parsed. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_GEOTIFF_TIME_FORMAT`. delimiter (str, optional): The delimiter to use for S3 object listings. Note: This parameter will only be used when reading from S3. s3_client (str, optional): Which ``S3Client`` to use when reading GeoTiffs from S3. There are currently two options: ``default`` and ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`. Note: ``mock`` should only be used in unit tests and debugging. s3_credentials(:class:`~geopyspark.geotrellis.s3.Credentials`, optional): Alternative Amazon S3 credentials to use when accessing the tile(s). Returns: :class:`~geopyspark.geotrellis.layer.RasterLayer` Raises: RuntimeError: ``s3_credentials`` were specified but the specified ``uri`` was not S3-based. """ inputs = {k: v for k, v in locals().items() if v is not None} pysc = get_spark_context() geotiff_rdd = pysc._gateway.jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD key = LayerType(inputs.pop('layer_type'))._key_name(False) partition_bytes = str(inputs.pop('partition_bytes')) uri = inputs.pop('uri') uris = (uri if isinstance(uri, list) else [uri]) try: s3_credentials = inputs.pop('s3_credentials') except KeyError: s3_credentials = None else: _validate_s3_credentials(uri, s3_credentials) uri_type = uri.split(":")[0] with set_s3_credentials(s3_credentials, uri_type): srdd = geotiff_rdd.get(pysc._jsc.sc(), key, uris, inputs, partition_bytes) return RasterLayer(layer_type, srdd)
def query(layer_type, uri, layer_name, layer_zoom=None, query_geom=None, time_intervals=None, query_proj=None, options=None, num_partitions=None, **kwargs): """Queries a single, zoom layer from a GeoTrellis catalog given spatial and/or time parameters. Unlike read, this method will only return part of the layer that intersects the specified region. Note: The whole layer could still be read in if ``intersects`` and/or ``time_intervals`` have not been set, or if the querried region contains the entire layer. Args: layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. layer_name (str): The name of the GeoTrellis catalog to be querried. layer_zoom (int, optional): The zoom level of the layer that is to be querried. If ``None``, then the ``layer_zoom`` will be set to 0. query_geom (bytes or shapely.geometry or :class:`~geopyspark.geotrellis.Extent`, Optional): The desired spatial area to be returned. Can either be a string, a shapely geometry, or instance of ``Extent``, or a WKB verson of the geometry. Note: Not all shapely geometires are supported. The following is are the types that are supported: * Point * Polygon * MultiPolygon Note: Only layers that were made from spatial, singleband GeoTiffs can query a ``Point``. All other types are restricted to ``Polygon`` and ``MulitPolygon``. If not specified, then the entire layer will be read. time_intervals (``[datetime.datetime]``, optional): A list of the time intervals to query. This parameter is only used when querying spatial-temporal data. The default value is, ``None``. If ``None``, then only the spatial area will be querried. query_proj (int or str, optional): The crs of the querried geometry if it is different than the layer it is being filtered against. If they are different and this is not set, then the returned ``TiledRasterLayer`` could contain incorrect values. If ``None``, then the geometry and layer are assumed to be in the same projection. options (dict, optional): Additional parameters for querying the tile for specific backends. The dictioanry is only used for ``Cassandra`` and ``HBase``, no other backend requires this to be set. num_partitions (int, optional): Sets RDD partition count when reading from catalog. **kwargs: The optional parameters can also be set as keywords arguements. The keywords must be in camel case. If both options and keywords are set, then the options will be used. Returns: :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer` """ options = options or kwargs or {} layer_zoom = layer_zoom or 0 pysc = get_spark_context() _construct_catalog(pysc, uri, options) cached = _mapped_cached[uri] key = map_key_input(LayerType(layer_type).value, True) num_partitions = num_partitions or pysc.defaultMinPartitions if not query_geom: srdd = cached.reader.read(key, layer_name, layer_zoom, num_partitions) return TiledRasterLayer(layer_type, srdd) else: if time_intervals: time_intervals = [time.astimezone(pytz.utc).isoformat() for time in time_intervals] else: time_intervals = [] query_proj = query_proj or "" if isinstance(query_proj, int): query_proj = str(query_proj) if isinstance(query_geom, (Polygon, MultiPolygon, Point)): srdd = cached.reader.query(key, layer_name, layer_zoom, shapely.wkb.dumps(query_geom), time_intervals, query_proj, num_partitions) elif isinstance(query_geom, Extent): srdd = cached.reader.query(key, layer_name, layer_zoom, shapely.wkb.dumps(query_geom.to_polygon), time_intervals, query_proj, num_partitions) elif isinstance(query_geom, bytes): srdd = cached.reader.query(key, layer_name, layer_zoom, query_geom, time_intervals, query_proj, num_partitions) else: raise TypeError("Could not query intersection", query_geom) return TiledRasterLayer(layer_type, srdd)
def get(layer_type, uri, crs=None, max_tile_size=None, num_partitions=None, chunk_size=None, time_tag=None, time_format=None, s3_client=None): """Creates a ``RasterLayer`` from GeoTiffs that are located on the local file system, ``HDFS``, or ``S3``. Args: layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. Note: All of the GeoTiffs must have the same saptial type. uri (str): The path to a given file/directory. crs (str, optional): The CRS that the output tiles should be in. The CRS must be in the well-known name format. If ``None``, then the CRS that the tiles were originally in will be used. max_tile_size (int, optional): The max size of each tile in the resulting Layer. If the size is smaller than a read in tile, then that tile will be broken into tiles of the specified size. If ``None``, then the whole tile will be read in. num_partitions (int, optional): The number of repartitions Spark will make when the data is repartitioned. If ``None``, then the data will not be repartitioned. chunk_size (int, optional): How many bytes of the file should be read in at a time. If ``None``, then files will be read in 65536 byte chunks. time_tag (str, optional): The name of the tiff tag that contains the time stamp for the tile. If ``None``, then the default value is: ``TIFFTAG_DATETIME``. time_format (str, optional): The pattern of the time stamp for java.time.format.DateTimeFormatter to parse. If ``None``, then the default value is: ``yyyy:MM:dd HH:mm:ss``. s3_client (str, optional): Which ``S3Cleint`` to use when reading GeoTiffs from S3. There are currently two options: ``default`` and ``mock``. If ``None``, ``defualt`` is used. Note: ``mock`` should only be used in unit tests and debugging. Returns: :class:`~geopyspark.geotrellis.rdd.RasterLayer` """ inputs = {k: v for k, v in locals().items() if v is not None} pysc = get_spark_context() geotiff_rdd = pysc._gateway.jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD key = map_key_input(LayerType(inputs.pop('layer_type')).value, False) if isinstance(uri, list): srdd = geotiff_rdd.get(pysc._jsc.sc(), key, inputs.pop('uri'), inputs) else: srdd = geotiff_rdd.get(pysc._jsc.sc(), key, [inputs.pop('uri')], inputs) return RasterLayer(layer_type, srdd)
def read_value(layer_type, uri, layer_name, layer_zoom, col, row, zdt=None, options=None, **kwargs): """Reads a single ``Tile`` from a GeoTrellis catalog. Unlike other functions in this module, this will not return a ``TiledRasterLayer``, but rather a GeoPySpark formatted raster. This is the function to use when creating a tile server. Note: When requesting a tile that does not exist, ``None`` will be returned. Args: layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. layer_name (str): The name of the GeoTrellis catalog to be read from. layer_zoom (int): The zoom level of the layer that is to be read. col (int): The col number of the tile within the layout. Cols run east to west. row (int): The row number of the tile within the layout. Row run north to south. zdt (``datetime.datetime``): The time stamp of the tile if the data is spatial-temporal. This is represented as a ``datetime.datetime.`` instance. The default value is, ``None``. If ``None``, then only the spatial area will be queried. options (dict, optional): Additional parameters for reading the tile for specific backends. The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this to be set. **kwargs: The optional parameters can also be set as keywords arguments. The keywords must be in camel case. If both options and keywords are set, then the options will be used. Returns: :class:`~geopyspark.geotrellis.Tile` """ if not _in_bounds(layer_type, uri, layer_name, layer_zoom, col, row): return None else: options = options or kwargs or {} if zdt: zdt = zdt.astimezone(pytz.utc).isoformat() else: zdt = '' if uri not in _mapped_cached: _construct_catalog(get_spark_context(), uri, options) cached = _mapped_cached[uri] key = map_key_input(LayerType(layer_type).value, True) values = cached.value_reader.readTile(key, layer_name, layer_zoom, col, row, zdt) return multibandtile_decoder(values)