Example #1
0
def euclidean_distance(geometry, source_crs, zoom, cell_type=CellType.FLOAT64):
    """Calculates the Euclidean distance of a Shapely geometry.

    Args:
        geometry (shapely.geometry): The input geometry to compute the Euclidean distance
            for.
        source_crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`, optional): The data
            type of the cells for the new layer. If not specified, then ``CellType.FLOAT64`` is used.

    Note:
        This function may run very slowly for polygonal inputs if they cover many cells of
        the output raster.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    if isinstance(source_crs, int):
        source_crs = str(source_crs)

    pysc = get_spark_context()

    srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.euclideanDistance(
        pysc._jsc.sc(), shapely.wkb.dumps(geometry), source_crs,
        CellType(cell_type).value, zoom)
    return TiledRasterLayer(LayerType.SPATIAL, srdd)
Example #2
0
def rasterize(geoms,
              crs,
              zoom,
              fill_value,
              cell_type=CellType.FLOAT64,
              options=None,
              num_partitions=None):
    """Rasterizes a Shapely geometries.

    Args:
        geoms ([shapely.geometry]): List of shapely geometries to rasterize.
        crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        fill_value (int or float): Value to burn into pixels intersectiong geometry
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the
            cells should be when created. Defaults to ``CellType.FLOAT64``.
        options (:class:`~geopyspark.geotrellis.RasterizerOptions`): Pixel intersection options.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.TiledRasterLayer`
    """

    if isinstance(crs, int):
        crs = str(crs)

    pysc = get_spark_context()
    wkb_geoms = [shapely.wkb.dumps(g) for g in geoms]
    srdd = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeGeometry(
        pysc._jsc.sc(), wkb_geoms, crs, zoom, float(fill_value),
        CellType(cell_type).value, options, num_partitions)
    return TiledRasterLayer(LayerType.SPATIAL, srdd)
Example #3
0
class Metadata(object):
    """Information of the values within a ``RasterLayer`` or ``TiledRasterLayer``.
    This data pertains to the layout and other attributes of the data within the classes.

    Args:
        bounds (:class:`~geopyspark.geotrellis.Bounds`): The ``Bounds`` of the
            values in the class.
        crs (str or int): The ``CRS`` of the data. Can either be the EPSG code, well-known name, or
            a PROJ.4 projection string.
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): The data type of the
            cells of the rasters.
        extent (:class:`~geopyspark.geotrellis.Extent`): The ``Extent`` that covers
            the all of the rasters.
        layout_definition (:class:`~geopyspark.geotrellis.LayoutDefinition`): The
            ``LayoutDefinition`` of all rasters.

    Attributes:
        bounds (:class:`~geopyspark.geotrellis.Bounds`): The ``Bounds`` of the values in the class.
        crs (str or int): The CRS of the data. Can either be the EPSG code, well-known name, or
            a PROJ.4 projection string.
        cell_type (str): The data type of the cells of the rasters.
        no_data_value (int or float or None): The noData value of the rasters within the layer.
            This can either be ``None``, an ``int``, or a ``float`` depending on the ``cell_type``.
        extent (:class:`~geopyspark.geotrellis.Extent`): The ``Extent`` that covers
            the all of the rasters.
        tile_layout (:class:`~geopyspark.geotrellis.TileLayout`): The ``TileLayout``
            that describes how the rasters are orginized.
        layout_definition (:class:`~geopyspark.geotrellis.LayoutDefinition`): The
            ``LayoutDefinition`` of all rasters.
    """
    def __init__(self, bounds, crs, cell_type, extent, layout_definition):
        self.bounds = bounds
        self.crs = crs

        if isinstance(cell_type, CellType):
            self.cell_type = CellType(cell_type).value
        else:
            self.cell_type = cell_type

        self.extent = extent
        self.tile_layout = layout_definition.tileLayout
        self.layout_definition = layout_definition

        if 'raw' in self.cell_type or 'bool' in self.cell_type:
            self.no_data_value = None
        elif 'ud' in self.cell_type:
            value = self.cell_type.split("ud")[1]

            if "float" in self.cell_type:
                self.no_data_value = float(value)
            else:
                self.no_data_value = int(value)
        else:
            if self.cell_type == CellType.INT8.value:
                self.no_data_value = -128
            elif self.cell_type == CellType.UINT8.value or self.cell_type == CellType.UINT16.value:
                self.no_data_value = 0
            elif self.cell_type == CellType.INT16.value:
                self.no_data_value = -32768
            elif self.cell_type == CellType.INT32.value:
                self.no_data_value = NO_DATA_INT
            else:
                self.no_data_value = float('nan')

    @classmethod
    def from_dict(cls, metadata_dict):
        """Creates ``Metadata`` from a dictionary.

        Args:
            metadata_dict (dict): The ``Metadata`` of a ``RasterLayer`` or ``TiledRasterLayer``
                instance that is in ``dict`` form.

        Returns:
            :class:`~geopyspark.geotrellis.Metadata`
        """

        crs = metadata_dict['crs']
        cell_type = metadata_dict['cellType']

        bounds_dict = metadata_dict['bounds']

        if len(bounds_dict['minKey']) == 2:
            min_key = SpatialKey(**bounds_dict['minKey'])
            max_key = SpatialKey(**bounds_dict['maxKey'])
        else:
            scala_min_key = bounds_dict['minKey']
            scala_max_key = bounds_dict['maxKey']

            scala_min_key['instant'] = datetime.datetime.utcfromtimestamp(
                scala_min_key['instant'] / 1000)
            scala_max_key['instant'] = datetime.datetime.utcfromtimestamp(
                scala_max_key['instant'] / 1000)

            min_key = SpaceTimeKey(**scala_min_key)
            max_key = SpaceTimeKey(**scala_max_key)

        bounds = Bounds(min_key, max_key)
        extent = Extent(**metadata_dict['extent'])

        layout_definition = LayoutDefinition(
            Extent(**metadata_dict['layoutDefinition']['extent']),
            TileLayout(**metadata_dict['layoutDefinition']['tileLayout']))

        return cls(bounds, crs, cell_type, extent, layout_definition)

    def to_dict(self):
        """Converts this instance to a ``dict``.

        Returns:
            ``dict``
        """

        metadata_dict = {
            'bounds': self.bounds._asdict(),
            'crs': self.crs,
            'cellType': self.cell_type,
            'extent': self.extent._asdict(),
            'layoutDefinition': {
                'extent': self.layout_definition.extent._asdict(),
                'tileLayout': self.tile_layout._asdict()
            }
        }

        return metadata_dict

    def __repr__(self):
        return "Metadata({}, {}, {}, {}, {}, {}, {})".format(
            self.bounds, self.cell_type, self.no_data_value, self.crs,
            self.extent, self.tile_layout, self.layout_definition)

    def __str__(self):
        return ("Metadata("
                "bounds={}"
                "cellType={}"
                "noDataValue={}"
                "crs={}"
                "extent={}"
                "tileLayout={}"
                "layoutDefinition={})").format(self.bounds, self.cell_type,
                                               self.no_data_value, self.crs,
                                               self.extent, self.tile_layout,
                                               self.layout_definition)
Example #4
0
def rasterize_features(features,
                       crs,
                       zoom,
                       cell_type=CellType.FLOAT64,
                       options=None,
                       zindex_cell_type=CellType.INT8,
                       partition_strategy=None):
    """Rasterizes a collection of :class:`~geopyspark.vector_pipe.Feature`\s.

    Args:
        features (pyspark.RDD[Feature]): A Python ``RDD`` that
            contains :class:`~geopyspark.vector_pipe.Feature`\s.

            Note:
                The ``properties`` of each ``Feature`` must be an instance of
                :class:`~geopyspark.vector_pipe.CellValue`.
        crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.

            Note:
                Not all rasterized ``Feature``\s may be present in the resulting layer
                if the ``zoom`` is not high enough.
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the
            cells should be when created. Defaults to ``CellType.FLOAT64``.
        options (:class:`~geopyspark.geotrellis.RasterizerOptions`, optional): Pixel intersection options.
        zindex_cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type
            the ``Z-Index`` cells are. Defaults to ``CellType.INT8``.
        partition_strategy (:class:`~geopyspark.HashPartitionStrategy` or :class:`~geopyspark.SpatialPartitioinStrategy`, optional):
            Sets the ``Partitioner`` for the resulting layer and how many partitions it has.
            Default is, ``None``.

            If ``None``, then the output layer will have the default ``Partitioner`` and a number
            of paritions that was determined by the method.

            If ``partition_strategy`` is set but has no ``num_partitions``, then the resulting layer
            will have the ``Partioner`` specified in the strategy with the with same number of
            partitions the source layer had.

            If ``partition_strategy`` is set and has a ``num_partitions``, then the resulting layer
            will have the ``Partioner`` and number of partitions specified in the strategy.

    Returns:
        :class:`~geopyspark.geotrellis.layer.TiledRasterLayer`
    """

    if isinstance(crs, int):
        crs = str(crs)

    pysc = get_spark_context()
    rasterizer = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeFeaturesWithZIndex

    ser = ProtoBufSerializer(feature_cellvalue_decoder, feature_cellvalue_encoder)
    reserialized_rdd = features._reserialize(ser)

    srdd = rasterizer(reserialized_rdd._jrdd.rdd(),
                      crs,
                      zoom,
                      CellType(cell_type).value,
                      options,
                      CellType(zindex_cell_type).value,
                      partition_strategy)

    return TiledRasterLayer(LayerType.SPATIAL, srdd)
Example #5
0
def rasterize(geoms,
              crs,
              zoom,
              fill_value,
              cell_type=CellType.FLOAT64,
              options=None,
              partition_strategy=None):
    """Rasterizes a Shapely geometries.

    Args:
        geoms ([shapely.geometry] or (shapely.geometry) or pyspark.RDD[shapely.geometry]): Either
            a list, tuple, or a Python RDD of shapely geometries to rasterize.
        crs (str or int): The CRS of the input geometry.
        zoom (int): The zoom level of the output raster.
        fill_value (int or float): Value to burn into pixels intersectiong geometry
        cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the
            cells should be when created. Defaults to ``CellType.FLOAT64``.
        options (:class:`~geopyspark.geotrellis.RasterizerOptions`, optional): Pixel intersection options.
        partition_strategy (:class:`~geopyspark.HashPartitionStrategy` or :class:`~geopyspark.SpatialPartitioinStrategy`, optional):
            Sets the ``Partitioner`` for the resulting layer and how many partitions it has.
            Default is, ``None``.

            If ``None``, then the output layer will have the default ``Partitioner`` and a number
            of paritions that was determined by the method.

            If ``partition_strategy`` is set but has no ``num_partitions``, then the resulting layer
            will have the ``Partioner`` specified in the strategy with the with same number of
            partitions the source layer had.

            If ``partition_strategy`` is set and has a ``num_partitions``, then the resulting layer
            will have the ``Partioner`` and number of partitions specified in the strategy.


    Returns:
        :class:`~geopyspark.geotrellis.layer.TiledRasterLayer`
    """

    if isinstance(crs, int):
        crs = str(crs)

    pysc = get_spark_context()
    rasterizer = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeGeometry

    if isinstance(geoms, (list, tuple)):
        wkb_geoms = [dumps(g) for g in geoms]

        srdd = rasterizer(pysc._jsc.sc(),
                          wkb_geoms,
                          crs,
                          zoom,
                          float(fill_value),
                          CellType(cell_type).value,
                          options,
                          partition_strategy)

    else:
        wkb_rdd = geoms.map(lambda geom: dumps(geom))

        # If this is False then the WKBs will be serialized
        # when going to Scala resulting in garbage
        wkb_rdd._bypass_serializer = True

        srdd = rasterizer(wkb_rdd._jrdd.rdd(),
                          crs,
                          zoom,
                          float(fill_value),
                          CellType(cell_type).value,
                          options,
                          partition_strategy)

    return TiledRasterLayer(LayerType.SPATIAL, srdd)