Python get_spark_context Examples, geopyspark.get_spark_context Python Examples

Example #1

0

Show file

def zfactor_calculator(mapped_zfactors):
    """Produces the Scala class, ``ZFactorCalculator`` as a ``JavaObject``.

    Unlike the ``ZFactorCalculator`` produced in
    :meth:`~geopyspark.geotrellis.zfactor_lat_lng_calculator`, this resulting
    ``ZFactorCalculator`` can used on ``Tile``\s in a different projection. However,
    it cannot be used between different types of projections. For example, a
    ``ZFactorCalculator`` produced for a Layer that is in ``WebMercator`` will not
    create an accurate ``ZFactor`` for a Layer that is in ``LatLng``.

    Args:
        mapped_zfactors(dict): A ``dict`` that maps lattitudes to ``ZFactor``\s.
           It is not required to supply a mapping for ever lattitude intersected
           in the layer. Rather, based on the lattitudes given, a linear interpolation
           will be performed and any lattitude not mapped will have its ``ZFactor``
           derived from that interpolation.

    Returns:
        ``py4j.JavaObject``
    """

    pysc = get_spark_context()
    string_map = {str(k): str(v) for k, v in mapped_zfactors.items()}
    calculator = pysc._gateway.jvm.geopyspark.geotrellis.\
            ZFactorCalculator.createZFactorCalculator(json.dumps(string_map))

    return calculator

Example #2

0

Show file

 def __init__(self, uri):
     self.uri = uri
     pysc = get_spark_context()
     try:
         self.wrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.AttributeStoreWrapper(uri)
     except Py4JJavaError as err:
         raise ValueError(err.java_exception.getMessage())

Example #3

0

Show file

def rasterize(geoms,
              crs,
              zoom,
              fill_value,
              cell_type=CellType.FLOAT64,
              options=None,
              num_partitions=None):
    """Rasterizes a Shapely geometries.

    Args:
        geoms ([shapely.geometry]): List of shapely geometries to rasterize.
        crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        fill_value (int or float): Value to burn into pixels intersectiong geometry
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the
            cells should be when created. Defaults to ``CellType.FLOAT64``.
        options (:class:`~geopyspark.geotrellis.RasterizerOptions`): Pixel intersection options.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    if isinstance(crs, int):
        crs = str(crs)

    pysc = get_spark_context()
    wkb_geoms = [shapely.wkb.dumps(g) for g in geoms]
    srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeGeometry(
        pysc._jsc.sc(), wkb_geoms, crs, zoom, float(fill_value),
        CellType(cell_type).value, options, num_partitions)
    return TiledRasterLayer(LayerType.SPATIAL, srdd)

Example #4

0

Show file

    def __init__(self, uri, layer_name, zoom=None):

        self.layer_name = layer_name
        self.zoom = zoom
        pysc = get_spark_context()
        ValueReaderWrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.ValueReaderWrapper
        self.wrapper = ValueReaderWrapper(uri)

Example #5

0

Show file

File: osm_reader.py Project: yoyodev/geopyspark

def from_dataframe(dataframe, target_extent=None):
    """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read
    in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data.
        target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The
            area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If
            ``None``, then all of the features will be returned.

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    if target_extent:
        if isinstance(target_extent, Polygon):
            target_extent = Extent.from_polygon(target_extent)._asdict()
        else:
            target_extent = target_extent._asdict()

    pysc = get_spark_context()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame(
        dataframe._jdf, target_extent)

    return FeaturesCollection(features)

Example #6

0

Show file

File: histogram.py Project: lossyrob/geopyspark

 def from_dict(cls, value):
     """Encodes histogram as a dictionary"""
     pysc = get_spark_context()
     histogram_json = json.dumps(value)
     scala_histogram = pysc._gateway.jvm.geopyspark.geotrellis.Json.readHistogram(
         histogram_json)
     return cls(scala_histogram)

Example #7

0

Show file

def test_create_params():
    pysc = gps.get_spark_context()
    gateway = JavaGateway(eager_load=True, gateway_parameters=pysc._gateway.gateway_parameters)
    jvm = gateway.jvm
    datacubeParams = jvm.org.openeo.geotrelliscommon.DataCubeParameters()
    datacubeParams.tileSize = 256
    assert datacubeParams.tileSize == 256

Example #8

0

Show file

def euclidean_distance(geometry, source_crs, zoom, cell_type=CellType.FLOAT64):
    """Calculates the Euclidean distance of a Shapely geometry.

    Args:
        geometry (shapely.geometry): The input geometry to compute the Euclidean distance
            for.
        source_crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`, optional): The data
            type of the cells for the new layer. If not specified, then ``CellType.FLOAT64`` is used.

    Note:
        This function may run very slowly for polygonal inputs if they cover many cells of
        the output raster.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    if isinstance(source_crs, int):
        source_crs = str(source_crs)

    pysc = get_spark_context()

    srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.euclideanDistance(
        pysc._jsc.sc(), shapely.wkb.dumps(geometry), source_crs,
        CellType(cell_type).value, zoom)
    return TiledRasterLayer(LayerType.SPATIAL, srdd)

Example #9

0

Show file

 def __init__(self, server):
     self.pysc = get_spark_context()
     self.server = server
     self.bound = False
     self._host = None
     self._port = None
     self.pysc._gateway.start_callback_server()

Example #10

0

Show file

File: catalog.py Project: lossyrob/geopyspark

def get_layer_ids(uri,
                  options=None,
                  **kwargs):
    """Returns a list of all of the layer ids in the selected catalog as dicts that contain the
    name and zoom of a given layer.

    Args:
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        options (dict, optional): Additional parameters for reading the layer for specific backends.
            The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires this
            to be set.
        **kwargs: The optional parameters can also be set as keywords arguments. The keywords must
            be in camel case. If both options and keywords are set, then the options will be used.

    Returns:
        [layerIds]

        Where ``layerIds`` is a ``dict`` with the following fields:
            - **name** (str): The name of the layer
            - **zoom** (int): The zoom level of the given layer.
    """

    options = options or kwargs or {}

    _construct_catalog(get_spark_context(), uri, options)
    cached = _mapped_cached[uri]

    return list(cached.reader.layerIds())

Example #11

0

Show file

    def from_histogram(cls,
                       histogram,
                       color_list,
                       no_data_color=0x00000000,
                       fallback=0x00000000,
                       classification_strategy=ClassificationStrategy.
                       LESS_THAN_OR_EQUAL_TO):
        """Converts a wrapped GeoTrellis histogram into a ``ColorMap``.

        Args:
            histogram (:class:`~geopyspark.geotrellis.Histogram`): A ``Histogram`` instance;
                specifies breaks
            color_list ([int]): The colors corresponding to the values in the
                breaks list, represented as integers e.g., 0xff000080 is red
                at half opacity.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromHistogram
        strat = ClassificationStrategy(classification_strategy).value
        return cls(
            fn(histogram.scala_histogram, color_list, no_data_color, fallback,
               strat))

Example #12

0

Show file

    def __init__(self, layout, crs=None, extent=None, cellsize=None, dimensions=None):
        self.__jvm = gps.get_spark_context()._gateway.jvm

        if isinstance(layout, gps.LocalLayout):
            if not extent:
                raise ValueError("Must specify an extent when using LocalLayout")

            if dimensions and not cellsize:
                cellsize = ((extent.xmax - extent.xmin)/dimensions[0], (extent.ymax - extent.ymin)/dimensions[1])
                dimensions = None

            if cellsize and not dimensions:
                tilewidth = layout.tile_cols * cellsize[0]
                tileheight = layout.tile_rows * cellsize[1]
                rows = ceil((extent.xmax - extent.xmin) / tilewidth)
                cols = ceil((extent.ymax - extent.ymin) / tileheight)
                extent = gps.Extent(extent.xmin, extent.ymax - rows * tileheight, extent.xmin + cols * tilewidth, extent.ymax)
                tl = gps.TileLayout(cols, rows, layout.tile_cols, layout.tile_rows)
            else:
                raise ValueError("For LocalLayout, must specify exactly one: cellsize or dimension")
        elif isinstance(layout, gps.GlobalLayout):
            try:
                from pyproj import Proj, transform
            except:
                raise ImportError('pyproj is required for GlobalLayout')

            if not layout.zoom:
                raise ValueError("Must specify a zoom level when using GlobalLayout")

            if not crs:
                raise ValueError("Must specify a crs when using GlobalLayout")

            if isinstance(crs, int):
                crs = "{}".format(crs)

            gtcrs = self.__jvm.geopyspark.geotrellis.TileLayer.getCRS(crs).get()

            if gtcrs.epsgCode().isDefined() and gtcrs.epsgCode().get() == 3857:
                extent = WEB_MERCATOR
            elif gtcrs.epsgCode().isDefined() and gtcrs.epsgCode().get() == 4326:
                extent = LATLNG
            else:
                llex = LATLNG
                proj4str = gtcrs.toProj4String()
                target = Proj(proj4str)
                xmin, ymin = target(llex.xmin, llex.ymin)
                xmax, ymax = target(llex.xmax, llex.ymax)
                extent = gps.Extent(xmin, ymin, xmax, ymax)

            layout_rows_cols = int(pow(2, layout.zoom))
            tl = gps.TileLayout(layout_rows_cols, layout_rows_cols, layout.tile_size, layout.tile_size)
        elif isinstance(layout, gps.LayoutDefinition):
            extent = layout.extent
            tl = layout.tileLayout

        ex = self.__jvm.geotrellis.vector.Extent(float(extent.xmin), float(extent.ymin), float(extent.xmax), float(extent.ymax))
        tilelayout = self.__jvm.geotrellis.raster.TileLayout(int(tl[0]), int(tl[1]), int(tl[2]), int(tl[3]))
        self.layout = gps.LayoutDefinition(extent, tl)
        self.__layout = self.__jvm.geotrellis.spark.tiling.LayoutDefinition(ex, tilelayout)

Example #13

0

Show file

File: combine_bands.py Project: zfcwbl/geopyspark

def combine_bands(layers):
    """Combines the bands of values that share the same key in two or more ``TiledRasterLayer``\s.

    This method will concat the bands of two or more values with the same key. For example,
    ``layer a`` has values that have 2 bands and ``layer b`` has values with 1 band. When
    ``combine_bands`` is used on both of these layers, then the resulting layer will have
    values with 3 bands, 2 from ``layer a`` and 1 from ``layer b``.

    Note:
        All layers must have the same ``layer_type``. If the layers are ``TiledRasterLayer``\s,
        then all of the layers must also have the same :class:`~geopyspark.geotrellis.TileLayout`
        and ``CRS``.

    Args:
        layers ([:class:`~geopyspark.RasterLayer`] or [:class:`~geopyspark.TiledRasterLayer`] or (:class:`~geopyspark.RasterLayer`) or (:class:`~geopyspark.TiledRasterLayer`)): A
            colection of two or more ``RasterLayer``\s or ``TiledRasterLayer``\s. **The order of the
            layers determines the order in which the bands are concatenated**. With the bands being
            ordered based on the position of their respective layer.

            For example, the first layer in ``layers`` is ``layer a`` which contains 2 bands and
            the second layer is ``layer b`` whose values have 1 band. The resulting layer will
            have values with 3 bands: the first 2 are from ``layer a`` and the third from ``layer b``.
            If the positions of ``layer a`` and ``layer b`` are reversed, then the resulting values'
            first band will be from ``layer b`` and the last 2 will be from ``layer a``.

    Returns:
        :class:`~geopyspark.RasterLayer` or :class:`~geopyspark.TiledRasterLayer`
    """

    if len(layers) == 1:
        raise ValueError(
            "combine_bands can only be performed on 2 or more layers")

    base_layer = layers[0]
    base_layer_type = base_layer.layer_type

    check_layers(base_layer, base_layer_type, layers)

    pysc = get_spark_context()

    if isinstance(base_layer, RasterLayer):
        if base_layer_type == LayerType.SPATIAL:
            result = pysc._gateway.jvm.geopyspark.geotrellis.ProjectedRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        else:
            result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])

        return RasterLayer(base_layer_type, result)

    else:
        if base_layer_type == LayerType.SPATIAL:
            result = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        else:
            result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalTiledRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        return TiledRasterLayer(base_layer_type, result)

Example #14

0

Show file

    def build(cls,
              breaks,
              colors=None,
              no_data_color=0x00000000,
              fallback=0x00000000,
              classification_strategy=ClassificationStrategy.
              LESS_THAN_OR_EQUAL_TO):
        """Given breaks and colors, build a ``ColorMap`` object.

        Args:
            breaks (dict or list or :class:`~geopyspark.geotrellis.Histogram`): If a ``dict`` then a
                mapping from tile values to colors, the latter represented as integers
                e.g., 0xff000080 is red at half opacity. If a ``list`` then tile values that
                specify breaks in the color mapping. If a ``Histogram`` then a histogram from which
                breaks can be derived.
            colors (str or list, optional):  If a ``str`` then the name of a matplotlib color ramp.
                If a ``list`` then either a list of colortools ``Color`` objects or a list
                of integers containing packed RGBA values. If ``None``, then the ``ColorMap`` will
                be created from the ``breaks`` given.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        if isinstance(breaks, dict):
            return ColorMap.from_break_map(breaks, no_data_color, fallback,
                                           classification_strategy)

        if isinstance(colors, str):
            color_list = get_colors_from_matplotlib(colors)
        elif isinstance(colors, list):
            if all(isinstance(c, int) for c in colors):
                color_list = colors
            else:
                color_list = get_colors_from_colors(colors)
        else:
            raise ValueError(
                "Could not construct ColorMap from the given colors", colors)

        if isinstance(breaks, list):
            return ColorMap.from_colors(breaks, color_list, no_data_color,
                                        fallback, classification_strategy)
        elif isinstance(breaks, Histogram):
            return ColorMap.from_histogram(breaks, color_list, no_data_color,
                                           fallback, classification_strategy)
        else:
            raise ValueError(
                "Could not construct ColorMap from the given breaks", breaks)

Example #15

0

Show file

File: shapefile.py Project: zfcwbl/geopyspark

def get(uri,
        extensions=['.shp', '.SHP'],
        num_partitions=None,
        s3_client=DEFAULT_S3_CLIENT):
    """Creates an ``RDD[Feature]`` from Shapefile(s) that are located on the local file system, ``HDFS``,
    or ``S3``.

    The ``properties`` of the ``Feautre``\s in the ``RDD`` will contain the attributes of their
    respective geometry in a ``dict``. All keys and values of each ``dict`` will be ``str``\s regardless
    of how the attribute is represented in the Shapefile.

    Note:
        This feature is currently experimental and will most likely change in the coming versions of
        GPS.

    Note:
        When reading from S3, the desired files **must** be publicly readable. Otherwise, you will
        get 403 errors.

        Due to the nature of how GPS reads Shapefile(s) from S3, the ``mock`` S3 Client cannot
        currently be used.

    Args:
        uri (str or [str]): The path or list of paths to the desired Shapfile(s)/directory(ies).
        extensions ([str], optional): A list of the extensions that the Shapefile(s) have.
            These are ``.shp`` and ``.SHP`` by default.
        num_partitions (int, optional): The number of partitions Spark
            will make when the ``RDD`` is created. If ``None``, then the
            ``defaultParallelism`` will be used.
        s3_client (str, optional): Which ``S3Cleint`` to use when reading
            GeoTiffs from S3. There are currently two options: ``default`` and
            ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`.

            Note:
                ``mock`` should only be used in unit tests and debugging.

    Returns:
        ``RDD[:class:`~geopyspark.geotrellis.Feature`]``
    """

    pysc = get_spark_context()

    num_partitions = num_partitions or pysc.defaultParallelism

    shapefile = pysc._gateway.jvm.geopyspark.geotools.shapefile.ShapefileRDD

    if isinstance(uri, (list, tuple)):
        jrdd = shapefile.get(pysc._jsc.sc(), uri, extensions, num_partitions,
                             s3_client)
    else:
        jrdd = shapefile.get(pysc._jsc.sc(), [uri], extensions, num_partitions,
                             s3_client)

    ser = ProtoBufSerializer(feature_decoder, None)

    return create_python_rdd(jrdd, ser)

Example #16

0

Show file

    def write_assets(self, directory: str) -> Dict:
        """
        Save generated assets into a directory, return asset metadata.

        :return: STAC assets dictionary: https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#assets
        """
        directory = pathlib.Path(directory).parent
        filename = str(pathlib.Path(directory) / "mlmodel.model")
        self._model.save(gps.get_spark_context() , filename)
        return {filename:{"href":filename}}

Example #17

0

Show file

File: histogram.py Project: lossyrob/geopyspark

    def to_dict(self):
        """Encodes histogram as a dictionary

        Returns:
           ``dict``
        """

        pysc = get_spark_context()
        histogram_json = pysc._gateway.jvm.geopyspark.geotrellis.Json.writeHistogram(
            self.scala_histogram)
        return json.loads(histogram_json)

Example #18

0

Show file

    def __init__(self, uri, layer_name, zoom=None, store=None):
        if store:
            self.store = AttributeStore.build(store)
        else:
            self.store = AttributeStore.cached(uri)

        self.layer_name = layer_name
        self.zoom = zoom
        pysc = get_spark_context()
        scala_store = self.store.wrapper.attributeStore()
        ValueReaderWrapper = pysc._gateway.jvm.geopyspark.geotrellis.io.ValueReaderWrapper
        self.wrapper = ValueReaderWrapper(scala_store, uri)

Example #19

0

Show file

    def load_disk_data(self, format: str, glob_pattern: str, options: dict,
                       viewing_parameters: dict) -> object:
        if format != 'GTiff':
            raise NotImplementedError(
                "The format is not supported by the backend: " + format)

        date_regex = options['date_regex']

        if glob_pattern.startswith("hdfs:"):
            kerberos()

        from_date = normalize_date(viewing_parameters.get("from", None))
        to_date = normalize_date(viewing_parameters.get("to", None))

        left = viewing_parameters.get("left", None)
        right = viewing_parameters.get("right", None)
        top = viewing_parameters.get("top", None)
        bottom = viewing_parameters.get("bottom", None)
        srs = viewing_parameters.get("srs", None)
        band_indices = viewing_parameters.get("bands")

        sc = gps.get_spark_context()

        gateway = JavaGateway(
            eager_load=True, gateway_parameters=sc._gateway.gateway_parameters)
        jvm = gateway.jvm

        extent = jvm.geotrellis.vector.Extent(float(left), float(bottom), float(right), float(top)) \
            if left is not None and right is not None and top is not None and bottom is not None else None

        pyramid = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(glob_pattern, date_regex) \
            .pyramid_seq(extent, srs, from_date, to_date)

        temporal_tiled_raster_layer = jvm.geopyspark.geotrellis.TemporalTiledRasterLayer
        option = jvm.scala.Option
        levels = {
            pyramid.apply(index)._1(): TiledRasterLayer(
                LayerType.SPACETIME,
                temporal_tiled_raster_layer(
                    option.apply(pyramid.apply(index)._1()),
                    pyramid.apply(index)._2()))
            for index in range(0, pyramid.size())
        }

        image_collection = GeotrellisTimeSeriesImageCollection(
            pyramid=gps.Pyramid(levels),
            service_registry=self._service_registry,
            metadata={})

        return image_collection.band_filter(
            band_indices) if band_indices else image_collection

Example #20

0

Show file

File: osm_reader.py Project: yoyodev/geopyspark

def from_orc(source, target_extent=None):
    """Reads in OSM data from an orc file that is located either locally or on S3. The
    resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        source (str): The path or URI to the orc file to be read. Can either be a local file, or
            a file on S3.

            Note:
                Reading a file from S3 requires additional setup depending on the environment
                and how the file is being read.

                The following describes the parameters that need to be set depending on
                how the files are to be read in. However, **if reading a file on EMR, then
                the access key and secret key do not need to be set**.

                If using ``s3a://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3a.impl``
                    - ``spark.hadoop.fs.s3a.access.key``
                    - ``spark.hadoop.fs.s3a.secret.key``

                If using ``s3n://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3n.access.key``
                    - ``spark.hadoop.fs.s3n.secret.key``

                An alternative to passing in your S3 credentials to ``SparkConf`` would be
                to export them as environment variables:
                    - ``AWS_ACCESS_KEY_ID=YOUR_KEY``
                    - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY``
        target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The
            area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If
            ``None``, then all of the features will be returned.

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    if target_extent:
        if isinstance(target_extent, Polygon):
            target_extent = Extent.from_polygon(target_extent)._asdict()
        else:
            target_extent = target_extent._asdict()

    pysc = get_spark_context()
    session = SparkSession.builder.config(
        conf=pysc.getConf()).enableHiveSupport().getOrCreate()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC(
        session._jsparkSession, source, target_extent)

    return FeaturesCollection(features)

Example #21

0

Show file

File: osm_reader.py Project: wsf1990/geopyspark

def from_dataframe(dataframe):
    """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read
    in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data.

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    pysc = get_spark_context()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame(
        dataframe._jdf)

    return FeaturesCollection(features)

Example #22

0

Show file

def crs_to_proj4(crs):
    """Converts a given CRS to a Proj4 string.

    Args:
        crs (str or int): Target CRS of reprojection. Either EPSG code, well-known name, or a
            PROJ.4 string. If ``None``, no reproject will be perfomed.

    Returns:
        str
    """

    if not isinstance(crs, str):
        crs = str(crs)

    pysc = get_spark_context()
    scala_crs = pysc._gateway.jvm.geopyspark.geotrellis.TileLayer.getCRS(crs).get()

    return scala_crs.toProj4String()

Example #23

0

Show file

File: s3.py Project: zfcwbl/geopyspark

def set_s3_credentials(credentials, uri_type):
    """Temporarily updates the session's Amazon S3 credentials for the
       duration of the context.

    Args:
        credentials (Credentials): The access and secret keys used to access
            Amazon S3 resources.
        uri_type (str): The URI type. 's3', 's3a', or 's3n'.
    """
    if credentials:
        if uri_type not in _S3_URI_PREFIXES:
            raise RuntimeError(
                'Cannot set S3 credentials for unrecognized URI type '
                '{}'.format(uri_type))
        configuration = get_spark_context()._conf
        with _set_s3_credentials(credentials, configuration, uri_type):
            yield
    else:
        yield

Example #24

0

Show file

File: osm_reader.py Project: wsf1990/geopyspark

def from_orc(source):
    """Reads in OSM data from an orc file that is located either locally or on S3. The
    resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`.

    Args:
        source (str): The path or URI to the orc file to be read. Can either be a local file, or
            a file on S3.

            Note:
                Reading a file from S3 requires additional setup depending on the environment
                and how the file is being read.

                The following describes the parameters that need to be set depending on
                how the files are to be read in. However, **if reading a file on EMR, then
                the access key and secret key do not need to be set**.

                If using ``s3a://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3a.impl``
                    - ``spark.hadoop.fs.s3a.access.key``
                    - ``spark.hadoop.fs.s3a.secret.key``

                If using ``s3n://``, then the following ``SparkConf`` parameters need to be set:
                    - ``spark.hadoop.fs.s3n.access.key``
                    - ``spark.hadoop.fs.s3n.secret.key``

                An alternative to passing in your S3 credentials to ``SparkConf`` would be
                to export them as environment variables:
                    - ``AWS_ACCESS_KEY_ID=YOUR_KEY``
                    - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY``

    Returns:
        :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`
    """

    pysc = get_spark_context()
    session = SparkSession.builder.config(
        conf=pysc.getConf()).enableHiveSupport().getOrCreate()
    features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC(
        session._jsparkSession, source)

    return FeaturesCollection(features)

Example #25

0

Show file

def zfactor_lat_lng_calculator(unit):
    """Produces the Scala class, ``ZFactorCalculator`` as a ``JavaObject``.

    The resulting ``ZFactorCalculator`` produced using this method assumes that
    the ``Tile``\s it will be deriving ``zfactor``\s from are in ``LatLng``
    (aka ``epsg:4326``). This caculator can still be used on ``Tile``\s with
    different projections, however, the resulting ``Slope`` calculations may
    be off.

    Args:
        units (str or :class:`~geopyspark.geotrellis.constant.Unit`): The unit of elevation
            in the target layer.

    Returns:
        ``py4j.JavaObject``
    """

    pysc = get_spark_context()
    calculator = pysc._gateway.jvm.geopyspark.geotrellis.\
            ZFactorCalculator.createLatLngZFactorCalculator(Unit(unit).value)

    return calculator

Example #26

0

Show file

File: rasterio.py Project: zfcwbl/geopyspark

def get(data_source,
        xcols=DEFAULT_MAX_TILE_SIZE,
        ycols=DEFAULT_MAX_TILE_SIZE,
        bands=None,
        crs_to_proj4=crs_to_proj4):
    """Creates an ``RDD`` of windows represented as the key value pair: ``(ProjectedExtent, Tile)``
    from URIs using rasterio.

    Args:
        data_source (str or [str] or RDD): The source of the data to be windowed.
            Can either be URI or list of URIs which point to where the source data can be found;
            or it can be an ``RDD`` that contains the URIs.
        xcols (int, optional): The desired tile width. If the size is smaller than
            the width of the read in tile, then that tile will be broken into smaller sections
            of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`.
        ycols (int, optional): The desired tile height. If the size is smaller than
            the height of the read in tile, then that tile will be broken into smaller sections
            of the given size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`.
        bands ([int], opitonal): The bands from which windows should be produced given as a list
            of ``int``\s. Defaults to ``None`` which causes all bands to be read.
        crs_to_proj4 (``rasterio.crs.CRS`` => str, optional) A funtion that takes a :class:`rasterio.crs.CRS`
            and returns a Proj4 string. Default is :func:`geopyspark.geotrellis.rasterio.crs_to_proj4`.

    Returns:
        RDD
    """

    pysc = gps.get_spark_context()

    if isinstance(data_source, (list, str)):
        if isinstance(data_source, str):
            data_source = [data_source]

        return pysc.\
                parallelize(data_source, len(data_source)).\
                flatMap(lambda ds: _read_windows(ds, xcols, ycols, bands, crs_to_proj4))
    else:
        return data_source.flatMap(
            lambda ds: _read_windows(ds, xcols, ycols, bands, crs_to_proj4))

Example #27

0

Show file

    def from_colors(cls,
                    breaks,
                    color_list,
                    no_data_color=0x00000000,
                    fallback=0x00000000,
                    classification_strategy=ClassificationStrategy.
                    LESS_THAN_OR_EQUAL_TO):
        """Converts lists of values and colors to a ``ColorMap``.

        Args:
            breaks (list): The tile values that specify breaks in the color
                mapping.
            color_list ([int]): The colors corresponding to the values in the
                breaks list, represented as integers---e.g., 0xff000080 is red
                at half opacity.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        if all(isinstance(x, int) for x in breaks):
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromBreaks
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(breaks, color_list, no_data_color, fallback, strat))
        else:
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromBreaksDouble
            arr = [float(br) for br in breaks]
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(arr, color_list, no_data_color, fallback, strat))

Example #28

0

Show file

def kerberos():
    import geopyspark as gps

    if 'HADOOP_CONF_DIR' not in os.environ:
        logger.warn(
            'HADOOP_CONF_DIR is not set. Kerberos based authentication will probably not be set up correctly.'
        )

    sc = gps.get_spark_context()
    gateway = JavaGateway(gateway_parameters=sc._gateway.gateway_parameters)
    jvm = gateway.jvm

    hadoop_auth = jvm.org.apache.hadoop.conf.Configuration().get(
        'hadoop.security.authentication')
    if hadoop_auth != 'kerberos':
        logger.warn(
            'Hadoop client does not have hadoop.security.authentication=kerberos.'
        )

    currentUser = jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser(
    )
    if currentUser.hasKerberosCredentials():
        return
    logger.info("Kerberos currentUser={u!r} isSecurityEnabled={s!r}".format(
        u=currentUser.toString(),
        s=jvm.org.apache.hadoop.security.UserGroupInformation.
        isSecurityEnabled()))
    # print(jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser().getAuthenticationMethod().toString())

    principal = sc.getConf().get("spark.yarn.principal")
    sparkKeytab = sc.getConf().get("spark.yarn.keytab")
    if principal is not None and sparkKeytab is not None:
        jvm.org.apache.hadoop.security.UserGroupInformation.loginUserFromKeytab(
            principal, sparkKeytab)
        jvm.org.apache.hadoop.security.UserGroupInformation.getCurrentUser(
        ).setAuthenticationMethod(
            jvm.org.apache.hadoop.security.UserGroupInformation.
            AuthenticationMethod.KERBEROS)

Example #29

0

Show file

File: catalog.py Project: lossyrob/geopyspark

def read_layer_metadata(layer_type,
                        uri,
                        layer_name,
                        layer_zoom,
                        options=None,
                        **kwargs):
    """Reads the metadata from a saved layer without reading in the whole layer.

    Args:
        layer_type (str or :class:`geopyspark.geotrellis.constants.LayerType`): What the spatial type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.
        uri (str): The Uniform Resource Identifier used to point towards the desired GeoTrellis
            catalog to be read from. The shape of this string varies depending on backend.
        layer_name (str): The name of the GeoTrellis catalog to be read from.
        layer_zoom (int): The zoom level of the layer that is to be read.
        options (dict, optional): Additional parameters for reading the layer for specific backends.
            The dictionary is only used for ``Cassandra`` and ``HBase``, no other backend requires
            this to be set.
        **kwargs: The optional parameters can also be set as keywords arguments. The keywords must
            be in camel case. If both options and keywords are set, then the options will be used.

    Returns:
        :class:`~geopyspark.geotrellis.Metadata`
    """

    options = options or kwargs or {}

    _construct_catalog(get_spark_context(), uri, options)
    cached = _mapped_cached[uri]

    if layer_type == LayerType.SPATIAL:
        metadata = cached.store.metadataSpatial(layer_name, layer_zoom)
    else:
        metadata = cached.store.metadataSpaceTime(layer_name, layer_zoom)

    return Metadata.from_dict(json.loads(metadata))

Example #30

0

Show file

    def from_break_map(cls,
                       break_map,
                       no_data_color=0x00000000,
                       fallback=0x00000000,
                       classification_strategy=ClassificationStrategy.
                       LESS_THAN_OR_EQUAL_TO):
        """Converts a dictionary mapping from tile values to colors to a ColorMap.

        Args:
            break_map (dict): A mapping from tile values to colors, the latter
                represented as integers e.g., 0xff000080 is red at half opacity.
            no_data_color(int, optional): A color to replace NODATA values with
            fallback (int, optional): A color to replace cells that have no
                value in the mapping
            classification_strategy (str or :class:`~geopyspark.geotrellis.constants.ClassificationStrategy`, optional):
                A string giving the strategy for converting tile values to colors. e.g., if
                ``ClassificationStrategy.LESS_THAN_OR_EQUAL_TO`` is specified, and the break map is
                {3: 0xff0000ff, 4: 0x00ff00ff}, then values up to 3 map to red, values from above 3
                and up to and including 4 become green, and values over 4 become the fallback color.

        Returns:
            :class:`~geopyspark.geotrellis.color.ColorMap`
        """

        pysc = get_spark_context()

        if all(isinstance(x, int) for x in break_map.keys()):
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromMap
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(break_map, no_data_color, fallback, strat))
        elif all(isinstance(x, float) for x in break_map.keys()):
            fn = pysc._gateway.jvm.geopyspark.geotrellis.ColorMapUtils.fromMapDouble
            strat = ClassificationStrategy(classification_strategy).value
            return cls(fn(break_map, no_data_color, fallback, strat))
        else:
            raise TypeError("Break map keys must be either int or float.")