def zfactor_calculator(mapped_zfactors): """Produces the Scala class, ``ZFactorCalculator`` as a ``JavaObject``. Unlike the ``ZFactorCalculator`` produced in :meth:`~geopyspark.geotrellis.zfactor_lat_lng_calculator`, this resulting ``ZFactorCalculator`` can used on ``Tile``\s in a different projection. However, it cannot be used between different types of projections. For example, a ``ZFactorCalculator`` produced for a Layer that is in ``WebMercator`` will not create an accurate ``ZFactor`` for a Layer that is in ``LatLng``. Args: mapped_zfactors(dict): A ``dict`` that maps lattitudes to ``ZFactor``\s. It is not required to supply a mapping for ever lattitude intersected in the layer. Rather, based on the lattitudes given, a linear interpolation will be performed and any lattitude not mapped will have its ``ZFactor`` derived from that interpolation. Returns: ``py4j.JavaObject`` """ pysc = get_spark_context() string_map = {str(k): str(v) for k, v in mapped_zfactors.items()} calculator = pysc._gateway.jvm.geopyspark.geotrellis.\ ZFactorCalculator.createZFactorCalculator(json.dumps(string_map)) return calculator
def __init__(self, uri): self.uri = uri pysc = get_spark_context() try: self.wrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.AttributeStoreWrapper(uri) except Py4JJavaError as err: raise ValueError(err.java_exception.getMessage())
def rasterize(geoms, crs, zoom, fill_value, cell_type=CellType.FLOAT64, options=None, num_partitions=None): """Rasterizes a Shapely geometries. Args: geoms ([shapely.geometry]): List of shapely geometries to rasterize. crs (str or int): The CRS of the input geometry. zoom (int): The zoom level of the output raster. fill_value (int or float): Value to burn into pixels intersectiong geometry cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the cells should be when created. Defaults to ``CellType.FLOAT64``. options (:class:`~geopyspark.geotrellis.RasterizerOptions`): Pixel intersection options. Returns: :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer` """ if isinstance(crs, int): crs = str(crs) pysc = get_spark_context() wkb_geoms = [shapely.wkb.dumps(g) for g in geoms] srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeGeometry( pysc._jsc.sc(), wkb_geoms, crs, zoom, float(fill_value), CellType(cell_type).value, options, num_partitions) return TiledRasterLayer(LayerType.SPATIAL, srdd)
def __init__(self, uri, layer_name, zoom=None): self.layer_name = layer_name self.zoom = zoom pysc = get_spark_context() ValueReaderWrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.ValueReaderWrapper self.wrapper = ValueReaderWrapper(uri)
def from_dataframe(dataframe, target_extent=None): """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`. Args: dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data. target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If ``None``, then all of the features will be returned. Returns: :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection` """ if target_extent: if isinstance(target_extent, Polygon): target_extent = Extent.from_polygon(target_extent)._asdict() else: target_extent = target_extent._asdict() pysc = get_spark_context() features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame( dataframe._jdf, target_extent) return FeaturesCollection(features)
def from_dict(cls, value): """Encodes histogram as a dictionary""" pysc = get_spark_context() histogram_json = json.dumps(value) scala_histogram = pysc._gateway.jvm.geopyspark.geotrellis.Json.readHistogram( histogram_json) return cls(scala_histogram)
def test_create_params(): pysc = gps.get_spark_context() gateway = JavaGateway(eager_load=True, gateway_parameters=pysc._gateway.gateway_parameters) jvm = gateway.jvm datacubeParams = jvm.org.openeo.geotrelliscommon.DataCubeParameters() datacubeParams.tileSize = 256 assert datacubeParams.tileSize == 256
def euclidean_distance(geometry, source_crs, zoom, cell_type=CellType.FLOAT64): """Calculates the Euclidean distance of a Shapely geometry. Args: geometry (shapely.geometry): The input geometry to compute the Euclidean distance for. source_crs (str or int): The CRS of the input geometry. zoom (int): The zoom level of the output raster. cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`, optional): The data type of the cells for the new layer. If not specified, then ``CellType.FLOAT64`` is used. Note: This function may run very slowly for polygonal inputs if they cover many cells of the output raster. Returns: :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer` """ if isinstance(source_crs, int): source_crs = str(source_crs) pysc = get_spark_context() srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.euclideanDistance( pysc._jsc.sc(), shapely.wkb.dumps(geometry), source_crs, CellType(cell_type).value, zoom) return TiledRasterLayer(LayerType.SPATIAL, srdd)
def __init__(self, server): self.pysc = get_spark_context() self.server = server self.bound = False self._host = None self._port = None self.pysc._gateway.start_callback_server()
def get_layer_ids(uri, options=None, **kwargs): """Returns a list of all of the layer ids in the selected catalog as dicts that contain the name and zoom of a given layer. Args: uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. options (dict, optional): Additional parameters for reading the layer for specific backends. The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this to be set. **kwargs: The optional parameters can also be set as keywords arguments. The keywords must be in camel case. If both options and keywords are set, then the options will be used. Returns: [layerIds] Where ``layerIds`` is a ``dict`` with the following fields: - **name** (str): The name of the layer - **zoom** (int): The zoom level of the given layer. """ options = options or kwargs or {} _construct_catalog(get_spark_context(), uri, options) cached = _mapped_cached[uri] return list(cached.reader.layerIds())
def from_histogram(cls, histogram, color_list, no_data_color=0x00000000, fallback=0x00000000, classification_strategy=ClassificationStrategy. LESS_THAN_OR_EQUAL_TO): """Converts a wrapped GeoTrellis histogram into a ``ColorMap``. Args: histogram (:class:`~geopyspark.geotrellis.Histogram`): A ``Histogram`` instance; specifies breaks color_list ([int]): The colors corresponding to the values in the breaks list, represented as integers e.g., 0xff000080 is red at half opacity. no_data_color(int, optional): A color to replace NODATA values with fallback (int, optional): A color to replace cells that have no value in the mapping classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional): A string giving the strategy for converting tile values to colors. e.g., if ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3 and up to and including 4 become green, and values over 4 become the fallback color. Returns: :class:`~geopyspark.geotrellis.color.ColorMap` """ pysc = get_spark_context() fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromHistogram strat = ClassificationStrategy(classification_strategy).value return cls( fn(histogram.scala_histogram, color_list, no_data_color, fallback, strat))
def __init__(self, layout, crs=None, extent=None, cellsize=None, dimensions=None): self.__jvm = gps.get_spark_context()._gateway.jvm if isinstance(layout, gps.LocalLayout): if not extent: raise ValueError("Must specify an extent when using LocalLayout") if dimensions and not cellsize: cellsize = ((extent.xmax - extent.xmin)/dimensions[0], (extent.ymax - extent.ymin)/dimensions[1]) dimensions = None if cellsize and not dimensions: tilewidth = layout.tile_cols * cellsize[0] tileheight = layout.tile_rows * cellsize[1] rows = ceil((extent.xmax - extent.xmin) / tilewidth) cols = ceil((extent.ymax - extent.ymin) / tileheight) extent = gps.Extent(extent.xmin, extent.ymax - rows * tileheight, extent.xmin + cols * tilewidth, extent.ymax) tl = gps.TileLayout(cols, rows, layout.tile_cols, layout.tile_rows) else: raise ValueError("For LocalLayout, must specify exactly one: cellsize or dimension") elif isinstance(layout, gps.GlobalLayout): try: from pyproj import Proj, transform except: raise ImportError('pyproj is required for GlobalLayout') if not layout.zoom: raise ValueError("Must specify a zoom level when using GlobalLayout") if not crs: raise ValueError("Must specify a crs when using GlobalLayout") if isinstance(crs, int): crs = "{}".format(crs) gtcrs = self.__jvm.geopyspark.geotrellis.TileLayer.getCRS(crs).get() if gtcrs.epsgCode().isDefined() and gtcrs.epsgCode().get() == 3857: extent = WEB_MERCATOR elif gtcrs.epsgCode().isDefined() and gtcrs.epsgCode().get() == 4326: extent = LATLNG else: llex = LATLNG proj4str = gtcrs.toProj4String() target = Proj(proj4str) xmin, ymin = target(llex.xmin, llex.ymin) xmax, ymax = target(llex.xmax, llex.ymax) extent = gps.Extent(xmin, ymin, xmax, ymax) layout_rows_cols = int(pow(2, layout.zoom)) tl = gps.TileLayout(layout_rows_cols, layout_rows_cols, layout.tile_size, layout.tile_size) elif isinstance(layout, gps.LayoutDefinition): extent = layout.extent tl = layout.tileLayout ex = self.__jvm.geotrellis.vector.Extent(float(extent.xmin), float(extent.ymin), float(extent.xmax), float(extent.ymax)) tilelayout = self.__jvm.geotrellis.raster.TileLayout(int(tl[0]), int(tl[1]), int(tl[2]), int(tl[3])) self.layout = gps.LayoutDefinition(extent, tl) self.__layout = self.__jvm.geotrellis.spark.tiling.LayoutDefinition(ex, tilelayout)
def combine_bands(layers): """Combines the bands of values that share the same key in two or more ``TiledRasterLayer``\s. This method will concat the bands of two or more values with the same key. For example, ``layer a`` has values that have 2 bands and ``layer b`` has values with 1 band. When ``combine_bands`` is used on both of these layers, then the resulting layer will have values with 3 bands, 2 from ``layer a`` and 1 from ``layer b``. Note: All layers must have the same ``layer_type``. If the layers are ``TiledRasterLayer``\s, then all of the layers must also have the same :class:`~geopyspark.geotrellis.TileLayout` and ``CRS``. Args: layers ([:class:`~geopyspark.RasterLayer`] or [:class:`~geopyspark.TiledRasterLayer`] or (:class:`~geopyspark.RasterLayer`) or (:class:`~geopyspark.TiledRasterLayer`)): A colection of two or more ``RasterLayer``\s or ``TiledRasterLayer``\s. **The order of the layers determines the order in which the bands are concatenated**. With the bands being ordered based on the position of their respective layer. For example, the first layer in ``layers`` is ``layer a`` which contains 2 bands and the second layer is ``layer b`` whose values have 1 band. The resulting layer will have values with 3 bands: the first 2 are from ``layer a`` and the third from ``layer b``. If the positions of ``layer a`` and ``layer b`` are reversed, then the resulting values' first band will be from ``layer b`` and the last 2 will be from ``layer a``. Returns: :class:`~geopyspark.RasterLayer` or :class:`~geopyspark.TiledRasterLayer` """ if len(layers) == 1: raise ValueError( "combine_bands can only be performed on 2 or more layers") base_layer = layers[0] base_layer_type = base_layer.layer_type check_layers(base_layer, base_layer_type, layers) pysc = get_spark_context() if isinstance(base_layer, RasterLayer): if base_layer_type == LayerType.SPATIAL: result = pysc._gateway.jvm.geopyspark.geotrellis.ProjectedRasterLayer.combineBands( pysc._jsc.sc(), [x.srdd for x in layers]) else: result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalRasterLayer.combineBands( pysc._jsc.sc(), [x.srdd for x in layers]) return RasterLayer(base_layer_type, result) else: if base_layer_type == LayerType.SPATIAL: result = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.combineBands( pysc._jsc.sc(), [x.srdd for x in layers]) else: result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalTiledRasterLayer.combineBands( pysc._jsc.sc(), [x.srdd for x in layers]) return TiledRasterLayer(base_layer_type, result)
def build(cls, breaks, colors=None, no_data_color=0x00000000, fallback=0x00000000, classification_strategy=ClassificationStrategy. LESS_THAN_OR_EQUAL_TO): """Given breaks and colors, build a ``ColorMap`` object. Args: breaks (dict or list or :class:`~geopyspark.geotrellis.Histogram`): If a ``dict`` then a mapping from tile values to colors, the latter represented as integers e.g., 0xff000080 is red at half opacity. If a ``list`` then tile values that specify breaks in the color mapping. If a ``Histogram`` then a histogram from which breaks can be derived. colors (str or list, optional): If a ``str`` then the name of a matplotlib color ramp. If a ``list`` then either a list of colortools ``Color`` objects or a list of integers containing packed RGBA values. If ``None``, then the ``ColorMap`` will be created from the ``breaks`` given. no_data_color(int, optional): A color to replace NODATA values with fallback (int, optional): A color to replace cells that have no value in the mapping classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional): A string giving the strategy for converting tile values to colors. e.g., if ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3 and up to and including 4 become green, and values over 4 become the fallback color. Returns: :class:`~geopyspark.geotrellis.color.ColorMap` """ pysc = get_spark_context() if isinstance(breaks, dict): return ColorMap.from_break_map(breaks, no_data_color, fallback, classification_strategy) if isinstance(colors, str): color_list = get_colors_from_matplotlib(colors) elif isinstance(colors, list): if all(isinstance(c, int) for c in colors): color_list = colors else: color_list = get_colors_from_colors(colors) else: raise ValueError( "Could not construct ColorMap from the given colors", colors) if isinstance(breaks, list): return ColorMap.from_colors(breaks, color_list, no_data_color, fallback, classification_strategy) elif isinstance(breaks, Histogram): return ColorMap.from_histogram(breaks, color_list, no_data_color, fallback, classification_strategy) else: raise ValueError( "Could not construct ColorMap from the given breaks", breaks)
def get(uri, extensions=['.shp', '.SHP'], num_partitions=None, s3_client=DEFAULT_S3_CLIENT): """Creates an ``RDD[Feature]`` from Shapefile(s) that are located on the local file system, ``HDFS``, or ``S3``. The ``properties`` of the ``Feautre``\s in the ``RDD`` will contain the attributes of their respective geometry in a ``dict``. All keys and values of each ``dict`` will be ``str``\s regardless of how the attribute is represented in the Shapefile. Note: This feature is currently experimental and will most likely change in the coming versions of GPS. Note: When reading from S3, the desired files **must** be publicly readable. Otherwise, you will get 403 errors. Due to the nature of how GPS reads Shapefile(s) from S3, the ``mock`` S3 Client cannot currently be used. Args: uri (str or [str]): The path or list of paths to the desired Shapfile(s)/directory(ies). extensions ([str], optional): A list of the extensions that the Shapefile(s) have. These are ``.shp`` and ``.SHP`` by default. num_partitions (int, optional): The number of partitions Spark will make when the ``RDD`` is created. If ``None``, then the ``defaultParallelism`` will be used. s3_client (str, optional): Which ``S3Cleint`` to use when reading GeoTiffs from S3. There are currently two options: ``default`` and ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`. Note: ``mock`` should only be used in unit tests and debugging. Returns: ``RDD[:class:`~geopyspark.geotrellis.Feature`]`` """ pysc = get_spark_context() num_partitions = num_partitions or pysc.defaultParallelism shapefile = pysc._gateway.jvm.geopyspark.geotools.shapefile.ShapefileRDD if isinstance(uri, (list, tuple)): jrdd = shapefile.get(pysc._jsc.sc(), uri, extensions, num_partitions, s3_client) else: jrdd = shapefile.get(pysc._jsc.sc(), [uri], extensions, num_partitions, s3_client) ser = ProtoBufSerializer(feature_decoder, None) return create_python_rdd(jrdd, ser)
def write_assets(self, directory: str) -> Dict: """ Save generated assets into a directory, return asset metadata. :return: STAC assets dictionary: https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#assets """ directory = pathlib.Path(directory).parent filename = str(pathlib.Path(directory) / "mlmodel.model") self._model.save(gps.get_spark_context() , filename) return {filename:{"href":filename}}
def to_dict(self): """Encodes histogram as a dictionary Returns: ``dict`` """ pysc = get_spark_context() histogram_json = pysc._gateway.jvm.geopyspark.geotrellis.Json.writeHistogram( self.scala_histogram) return json.loads(histogram_json)
def __init__(self, uri, layer_name, zoom=None, store=None): if store: self.store = AttributeStore.build(store) else: self.store = AttributeStore.cached(uri) self.layer_name = layer_name self.zoom = zoom pysc = get_spark_context() scala_store = self.store.wrapper.attributeStore() ValueReaderWrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.ValueReaderWrapper self.wrapper = ValueReaderWrapper(scala_store, uri)
def load_disk_data(self, format: str, glob_pattern: str, options: dict, viewing_parameters: dict) -> object: if format != 'GTiff': raise NotImplementedError( "The format is not supported by the backend: " + format) date_regex = options['date_regex'] if glob_pattern.startswith("hdfs:"): kerberos() from_date = normalize_date(viewing_parameters.get("from", None)) to_date = normalize_date(viewing_parameters.get("to", None)) left = viewing_parameters.get("left", None) right = viewing_parameters.get("right", None) top = viewing_parameters.get("top", None) bottom = viewing_parameters.get("bottom", None) srs = viewing_parameters.get("srs", None) band_indices = viewing_parameters.get("bands") sc = gps.get_spark_context() gateway = JavaGateway( eager_load=True, gateway_parameters=sc._gateway.gateway_parameters) jvm = gateway.jvm extent = jvm.geotrellis.vector.Extent(float(left), float(bottom), float(right), float(top)) \ if left is not None and right is not None and top is not None and bottom is not None else None pyramid = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(glob_pattern, date_regex) \ .pyramid_seq(extent, srs, from_date, to_date) temporal_tiled_raster_layer = jvm.geopyspark.geotrellis.TemporalTiledRasterLayer option = jvm.scala.Option levels = { pyramid.apply(index)._1(): TiledRasterLayer( LayerType.SPACETIME, temporal_tiled_raster_layer( option.apply(pyramid.apply(index)._1()), pyramid.apply(index)._2())) for index in range(0, pyramid.size()) } image_collection = GeotrellisTimeSeriesImageCollection( pyramid=gps.Pyramid(levels), service_registry=self._service_registry, metadata={}) return image_collection.band_filter( band_indices) if band_indices else image_collection
def from_orc(source, target_extent=None): """Reads in OSM data from an orc file that is located either locally or on S3. The resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`. Args: source (str): The path or URI to the orc file to be read. Can either be a local file, or a file on S3. Note: Reading a file from S3 requires additional setup depending on the environment and how the file is being read. The following describes the parameters that need to be set depending on how the files are to be read in. However, **if reading a file on EMR, then the access key and secret key do not need to be set**. If using ``s3a://``, then the following ``SparkConf`` parameters need to be set: - ``spark.hadoop.fs.s3a.impl`` - ``spark.hadoop.fs.s3a.access.key`` - ``spark.hadoop.fs.s3a.secret.key`` If using ``s3n://``, then the following ``SparkConf`` parameters need to be set: - ``spark.hadoop.fs.s3n.access.key`` - ``spark.hadoop.fs.s3n.secret.key`` An alternative to passing in your S3 credentials to ``SparkConf`` would be to export them as environment variables: - ``AWS_ACCESS_KEY_ID=YOUR_KEY`` - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY`` target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If ``None``, then all of the features will be returned. Returns: :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection` """ if target_extent: if isinstance(target_extent, Polygon): target_extent = Extent.from_polygon(target_extent)._asdict() else: target_extent = target_extent._asdict() pysc = get_spark_context() session = SparkSession.builder.config( conf=pysc.getConf()).enableHiveSupport().getOrCreate() features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC( session._jsparkSession, source, target_extent) return FeaturesCollection(features)
def from_dataframe(dataframe): """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`. Args: dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data. Returns: :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection` """ pysc = get_spark_context() features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame( dataframe._jdf) return FeaturesCollection(features)
def crs_to_proj4(crs): """Converts a given CRS to a Proj4 string. Args: crs (str or int): Target CRS of reprojection. Either EPSG code, well-known name, or a PROJ.4 string. If ``None``, no reproject will be perfomed. Returns: str """ if not isinstance(crs, str): crs = str(crs) pysc = get_spark_context() scala_crs = pysc._gateway.jvm.geopyspark.geotrellis.TileLayer.getCRS(crs).get() return scala_crs.toProj4String()
def set_s3_credentials(credentials, uri_type): """Temporarily updates the session's Amazon S3 credentials for the duration of the context. Args: credentials (Credentials): The access and secret keys used to access Amazon S3 resources. uri_type (str): The URI type. 's3', 's3a', or 's3n'. """ if credentials: if uri_type not in _S3_URI_PREFIXES: raise RuntimeError( 'Cannot set S3 credentials for unrecognized URI type ' '{}'.format(uri_type)) configuration = get_spark_context()._conf with _set_s3_credentials(credentials, configuration, uri_type): yield else: yield
def from_orc(source): """Reads in OSM data from an orc file that is located either locally or on S3. The resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`. Args: source (str): The path or URI to the orc file to be read. Can either be a local file, or a file on S3. Note: Reading a file from S3 requires additional setup depending on the environment and how the file is being read. The following describes the parameters that need to be set depending on how the files are to be read in. However, **if reading a file on EMR, then the access key and secret key do not need to be set**. If using ``s3a://``, then the following ``SparkConf`` parameters need to be set: - ``spark.hadoop.fs.s3a.impl`` - ``spark.hadoop.fs.s3a.access.key`` - ``spark.hadoop.fs.s3a.secret.key`` If using ``s3n://``, then the following ``SparkConf`` parameters need to be set: - ``spark.hadoop.fs.s3n.access.key`` - ``spark.hadoop.fs.s3n.secret.key`` An alternative to passing in your S3 credentials to ``SparkConf`` would be to export them as environment variables: - ``AWS_ACCESS_KEY_ID=YOUR_KEY`` - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY`` Returns: :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection` """ pysc = get_spark_context() session = SparkSession.builder.config( conf=pysc.getConf()).enableHiveSupport().getOrCreate() features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC( session._jsparkSession, source) return FeaturesCollection(features)
def zfactor_lat_lng_calculator(unit): """Produces the Scala class, ``ZFactorCalculator`` as a ``JavaObject``. The resulting ``ZFactorCalculator`` produced using this method assumes that the ``Tile``\s it will be deriving ``zfactor``\s from are in ``LatLng`` (aka ``epsg:4326``). This caculator can still be used on ``Tile``\s with different projections, however, the resulting ``Slope`` calculations may be off. Args: units (str or :class:`~geopyspark.geotrellis.constant.Unit`): The unit of elevation in the target layer. Returns: ``py4j.JavaObject`` """ pysc = get_spark_context() calculator = pysc._gateway.jvm.geopyspark.geotrellis.\ ZFactorCalculator.createLatLngZFactorCalculator(Unit(unit).value) return calculator
def get(data_source, xcols=DEFAULT_MAX_TILE_SIZE, ycols=DEFAULT_MAX_TILE_SIZE, bands=None, crs_to_proj4=crs_to_proj4): """Creates an ``RDD`` of windows represented as the key value pair: ``(ProjectedExtent, Tile)`` from URIs using rasterio. Args: data_source (str or [str] or RDD): The source of the data to be windowed. Can either be URI or list of URIs which point to where the source data can be found; or it can be an ``RDD`` that contains the URIs. xcols (int, optional): The desired tile width. If the size is smaller than the width of the read in tile, then that tile will be broken into smaller sections of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`. ycols (int, optional): The desired tile height. If the size is smaller than the height of the read in tile, then that tile will be broken into smaller sections of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`. bands ([int], opitonal): The bands from which windows should be produced given as a list of ``int``\s. Defaults to ``None`` which causes all bands to be read. crs_to_proj4 (``rasterio.crs.CRS`` => str, optional) A funtion that takes a :class:`rasterio.crs.CRS` and returns a Proj4 string. Default is :func:`geopyspark.geotrellis.rasterio.crs_to_proj4`. Returns: RDD """ pysc = gps.get_spark_context() if isinstance(data_source, (list, str)): if isinstance(data_source, str): data_source = [data_source] return pysc.\ parallelize(data_source, len(data_source)).\ flatMap(lambda ds: _read_windows(ds, xcols, ycols, bands, crs_to_proj4)) else: return data_source.flatMap( lambda ds: _read_windows(ds, xcols, ycols, bands, crs_to_proj4))
def from_colors(cls, breaks, color_list, no_data_color=0x00000000, fallback=0x00000000, classification_strategy=ClassificationStrategy. LESS_THAN_OR_EQUAL_TO): """Converts lists of values and colors to a ``ColorMap``. Args: breaks (list): The tile values that specify breaks in the color mapping. color_list ([int]): The colors corresponding to the values in the breaks list, represented as integers---e.g., 0xff000080 is red at half opacity. no_data_color(int, optional): A color to replace NODATA values with fallback (int, optional): A color to replace cells that have no value in the mapping classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional): A string giving the strategy for converting tile values to colors. e.g., if ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3 and up to and including 4 become green, and values over 4 become the fallback color. Returns: :class:`~geopyspark.geotrellis.color.ColorMap` """ pysc = get_spark_context() if all(isinstance(x, int) for x in breaks): fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromBreaks strat = ClassificationStrategy(classification_strategy).value return cls(fn(breaks, color_list, no_data_color, fallback, strat)) else: fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromBreaksDouble arr = [float(br) for br in breaks] strat = ClassificationStrategy(classification_strategy).value return cls(fn(arr, color_list, no_data_color, fallback, strat))
def kerberos(): import geopyspark as gps if 'HADOOP_CONF_DIR' not in os.environ: logger.warn( 'HADOOP_CONF_DIR is not set. Kerberos based authentication will probably not be set up correctly.' ) sc = gps.get_spark_context() gateway = JavaGateway(gateway_parameters=sc._gateway.gateway_parameters) jvm = gateway.jvm hadoop_auth = jvm.org.apache.hadoop.conf.Configuration().get( 'hadoop.security.authentication') if hadoop_auth != 'kerberos': logger.warn( 'Hadoop client does not have hadoop.security.authentication=kerberos.' ) currentUser = jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser( ) if currentUser.hasKerberosCredentials(): return logger.info("Kerberos currentUser={u!r} isSecurityEnabled={s!r}".format( u=currentUser.toString(), s=jvm.org.apache.hadoop.security.UserGroupInformation. isSecurityEnabled())) # print(jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser().getAuthenticationMethod().toString()) principal = sc.getConf().get("spark.yarn.principal") sparkKeytab = sc.getConf().get("spark.yarn.keytab") if principal is not None and sparkKeytab is not None: jvm.org.apache.hadoop.security.UserGroupInformation.loginUserFromKeytab( principal, sparkKeytab) jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser( ).setAuthenticationMethod( jvm.org.apache.hadoop.security.UserGroupInformation. AuthenticationMethod.KERBEROS)
def read_layer_metadata(layer_type, uri, layer_name, layer_zoom, options=None, **kwargs): """Reads the metadata from a saved layer without reading in the whole layer. Args: layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type of the geotiffs are. This is represented by either constants within ``LayerType`` or by a string. uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis catalog to be read from. The shape of this string varies depending on backend. layer_name (str): The name of the GeoTrellis catalog to be read from. layer_zoom (int): The zoom level of the layer that is to be read. options (dict, optional): Additional parameters for reading the layer for specific backends. The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this to be set. **kwargs: The optional parameters can also be set as keywords arguments. The keywords must be in camel case. If both options and keywords are set, then the options will be used. Returns: :class:`~geopyspark.geotrellis.Metadata` """ options = options or kwargs or {} _construct_catalog(get_spark_context(), uri, options) cached = _mapped_cached[uri] if layer_type == LayerType.SPATIAL: metadata = cached.store.metadataSpatial(layer_name, layer_zoom) else: metadata = cached.store.metadataSpaceTime(layer_name, layer_zoom) return Metadata.from_dict(json.loads(metadata))
def from_break_map(cls, break_map, no_data_color=0x00000000, fallback=0x00000000, classification_strategy=ClassificationStrategy. LESS_THAN_OR_EQUAL_TO): """Converts a dictionary mapping from tile values to colors to a ColorMap. Args: break_map (dict): A mapping from tile values to colors, the latter represented as integers e.g., 0xff000080 is red at half opacity. no_data_color(int, optional): A color to replace NODATA values with fallback (int, optional): A color to replace cells that have no value in the mapping classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional): A string giving the strategy for converting tile values to colors. e.g., if ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3 and up to and including 4 become green, and values over 4 become the fallback color. Returns: :class:`~geopyspark.geotrellis.color.ColorMap` """ pysc = get_spark_context() if all(isinstance(x, int) for x in break_map.keys()): fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromMap strat = ClassificationStrategy(classification_strategy).value return cls(fn(break_map, no_data_color, fallback, strat)) elif all(isinstance(x, float) for x in break_map.keys()): fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromMapDouble strat = ClassificationStrategy(classification_strategy).value return cls(fn(break_map, no_data_color, fallback, strat)) else: raise TypeError("Break map keys must be either int or float.")