Beispiel #1
0
    def test_all_zeros(self):
        arr = np.zeros((1, 16, 16))
        tile = {'data': arr, 'no_data_value': -500}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)
        min_max = raster_rdd.get_min_max()

        self.assertEqual((0.0, 0.0), min_max)
Beispiel #2
0
    def test_floating(self):
        arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                         [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]],
                       dtype=float)

        tile = {'data': arr, 'no_data_value': float('nan')}
        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)
        min_max = raster_rdd.get_min_max()

        self.assertEqual((0.0, 2.0), min_max)
Beispiel #3
0
    def test_multibands(self):
        arr = np.array(
            [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]],
            dtype=int)
        tile = {'data': arr, 'no_data_value': -500}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)
        min_max = raster_rdd.get_min_max()

        self.assertEqual((1.0, 4.0), min_max)
    def test_collection_python_rdd(self):
        data = rasterio.open(self.dir_path)
        tile_dict = {'data': data.read(), 'no_data_value': data.nodata}

        rasterio_rdd = self.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile_dict)])
        raster_rdd = RasterRDD.from_numpy_rdd(self.geopysc, SPATIAL,
                                              rasterio_rdd)

        result = raster_rdd.collect_metadata(extent=self.extent,
                                             layout=self.layout)

        self.assertEqual(result.extent, self.extent)
        self.assertEqual(result.layout_definition.extent, self.extent)
        self.assertEqual(result.layout_definition.tileLayout, self.layout)
Beispiel #5
0
    def test_all_zeros(self):
        arr = np.zeros((1, 16, 16))
        tile = {'data': arr, 'no_data_value': -500}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        value_map = {0: 1}

        result = raster_rdd.reclassify(value_map,
                                       int).to_numpy_rdd().first()[1]['data']

        self.assertTrue((result == 1).all())
    def test_to_int(self):
        arr = np.array([[0.4324323432124, 0.0, 0.0],
                        [1.0, 1.0, 1.0]], dtype=float)

        epsg_code = 3857
        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 10.0, 'ymax': 10.0}
        projected_extent = {'extent': extent, 'epsg': epsg_code}

        tile = {'data': arr, 'no_data_value': float('nan')}
        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd)

        converted = raster_rdd.convert_data_type(INT32)
        arr = converted.to_numpy_rdd().first()[1]['data']

        self.assertEqual(arr.dtype, np.int64)
Beispiel #7
0
    def test_if_working(self):
        arr = np.zeros((1, 16, 16))
        epsg_code = 3857
        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 10.0, 'ymax': 10.0}

        tile = {'data': arr, 'no_data_value': False}
        projected_extent = {'extent': extent, 'epsg': epsg_code}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile)
                                                      ])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        laid_out = raster_rdd.to_tiled_layer()

        result = PngRDD.makePyramid(laid_out, HOT)
Beispiel #8
0
    def test_ignore_no_data_floats(self):
        arr = np.ones((1, 4, 4))
        np.fill_diagonal(arr[0], float('nan'))
        tile = {'data': arr, 'no_data_value': float('nan')}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        value_map = {1.0: 0.0}

        result = raster_rdd.reclassify(
            value_map, float,
            replace_nodata_with=1.0).to_numpy_rdd().first()[1]['data']

        self.assertTrue((result == np.identity(4)).all())
Beispiel #9
0
    def test_ignore_no_data_ints(self):
        arr = np.ones((1, 16, 16), int)
        np.fill_diagonal(arr[0], NODATAINT)
        tile = {'data': arr, 'no_data_value': NODATAINT}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        value_map = {1: 0}

        result = raster_rdd.reclassify(
            value_map, int,
            replace_nodata_with=1).to_numpy_rdd().first()[1]['data']

        self.assertTrue((result == np.identity(16, int)).all())
    def test_persist(self):
        arr = np.array(
            [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]],
            dtype=int)
        tile = {'data': arr, 'no_data_value': -500}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        self.assertEqual(raster_rdd.is_cached, False)

        raster_rdd.persist(StorageLevel.MEMORY_ONLY)
        self.assertEqual(raster_rdd.is_cached, True)

        raster_rdd.unpersist()
        self.assertEqual(raster_rdd.is_cached, False)
Beispiel #11
0
    def test_no_data_floats(self):
        arr = np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]],
                       dtype=float)
        tile = {'data': arr, 'no_data_value': float('nan')}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        value_map = {0.0: float('nan')}

        result = raster_rdd.reclassify(value_map,
                                       float).to_numpy_rdd().first()[1]['data']

        for x in list(result.flatten()):
            self.assertTrue(math.isnan(x))
    def test_wrong_cols_and_rows(self):
        arr = np.zeros((1, 250, 250))
        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)

        tile = {'data': arr, 'no_data_value': False}
        projected_extent = {'extent': extent, 'epsg': epsg_code}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile)
                                                      ])

        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        metadata = raster_rdd.collect_metadata(tile_size=250)
        laid_out = raster_rdd.tile_to_layout(metadata)

        with pytest.raises(ValueError):
            laid_out.pyramid(start_zoom=12, end_zoom=1)
Beispiel #13
0
    def test_multibands(self):
        arr = np.array(
            [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]],
            dtype=int)
        tile = {'data': arr, 'no_data_value': -500}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        value_map = {3: 10, 4: 20}

        result = raster_rdd.reclassify(value_map,
                                       int).to_numpy_rdd().first()[1]['data']

        expected = np.array([[[10, 10, 10, 10]], [[10, 10, 10, 10]],
                             [[10, 10, 10, 10]], [[20, 20, 20, 20]]],
                            dtype=int)

        self.assertTrue((result == expected).all())
Beispiel #14
0
    def test_floating_voint_ranges(self):
        arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                         [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]],
                       dtype=float)

        tile = {'data': arr, 'no_data_value': float('nan')}
        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent,
                                                       tile)])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)

        value_map = {2.0: 5.0}

        result = raster_rdd.reclassify(
            value_map, float, LESSTHAN).to_numpy_rdd().first()[1]['data']

        expected = np.array([[[5.0, 5.0, 5.0, 5.0], [5.0, 5.0, 5.0, 5.0],
                              [5.0, 5.0, 5.0, 5.0]]],
                            dtype=float)

        self.assertTrue((result[0, 2, ] == expected).all())
        for x in result[0, 3, ]:
            self.assertTrue(math.isnan(x))
    def test_correct_base(self):
        arr = np.zeros((1, 16, 16))
        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)

        tile = {'data': arr, 'no_data_value': False}
        projected_extent = {'extent': extent, 'epsg': epsg_code}

        rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile)
                                                      ])
        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL,
                                              rdd)
        tile_layout = TileLayout(32, 32, 16, 16)
        new_extent = Extent(-20037508.342789244, -20037508.342789244,
                            20037508.342789244, 20037508.342789244)

        metadata = raster_rdd.collect_metadata(extent=new_extent,
                                               layout=tile_layout)
        laid_out = raster_rdd.tile_to_layout(metadata)

        result = laid_out.pyramid(start_zoom=5, end_zoom=1)

        self.pyramid_building_check(result)
Beispiel #16
0
def get(geopysc, rdd_type, uri, options=None, **kwargs):
    """Creates a ``RasterRDD`` from GeoTiffs that are located on the local file system, ``HDFS``,
    or ``S3``.

    Args:
        geopysc (geopyspark.GeoPyContext): The ``GeoPyContext`` being used this session.
        rdd_type (str): What the spatial type of the geotiffs are. This is
            represented by the constants: ``SPATIAL`` and ``SPACETIME``.

            Note:
                All of the GeoTiffs must have the same saptial type.
        uri (str): The path to a given file/directory.
        options (dict, optional): A dictionary of different options that are used
            when creating the RDD. This defaults to ``None``. If ``None``, then the
            RDD will be created using the default options for the given backend
            in GeoTrellis.

            Note:
                Key values in the ``dict`` should be in camel case, as this is the style that is
                used in Scala.

            These are the options when using the local file system or ``HDFS``:
                * **crs** (str, optional): The CRS that the output tiles should be
                    in. The CRS must be in the well-known name format. If ``None``,
                    then the CRS that the tiles were originally in will be used.
                * **timeTag** (str, optional): The name of the tiff tag that contains
                    the time stamp for the tile. If ``None``, then the default value
                    is: ``TIFFTAG_DATETIME``.
                * **timeFormat** (str, optional): The pattern of the time stamp for
                    java.time.format.DateTimeFormatter to parse. If ``None``,
                    then the default value is: ``yyyy:MM:dd HH:mm:ss``.
                * **maxTileSize** (int, optional): The max size of each tile in the
                    resulting RDD. If the size is smaller than a read in tile,
                    then that tile will be broken into tiles of the specified
                    size. If ``None``, then the whole tile will be read in.
                * **numPartitions** (int, optional): The number of repartitions Spark
                    will make when the data is repartitioned. If ``None``, then the
                    data will not be repartitioned.
                * **chunkSize** (int, optional): How many bytes of the file should be
                    read in at a time. If None, then files will be read in 65536
                    byte chunks.

            ``S3`` has the above options in addition to this:
                * **s3Client** (str, optional): Which ``S3Cleint`` to use when reading
                    GeoTiffs. There are currently two options: ``default`` and
                    ``mock``. If ``None``, ``defualt`` is used.

                    Note:
                        ``mock`` should only be used in unit tests and debugging.

        **kwargs: Option parameters can also be entered as keyword arguements.

    Note:
        Defining both ``options`` and ``kwargs`` will cause the ``kwargs`` to be ignored in favor
        of ``options``.

    Returns:
        :class:`~geopyspark.geotrellis.rdd.RasterRDD`
    """

    geotiff_rdd = geopysc._jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD

    key = geopysc.map_key_input(rdd_type, False)

    if kwargs and not options:
        options = kwargs

    if options:
        if isinstance(uri, list):
            srdd = geotiff_rdd.get(geopysc.sc, key, uri, options)
        else:
            srdd = geotiff_rdd.get(geopysc.sc, key, [uri], options)
    else:
        if isinstance(uri, list):
            srdd = geotiff_rdd.get(geopysc.sc, key, uri, {})
        else:
            srdd = geotiff_rdd.get(geopysc.sc, key, [uri], {})

    return RasterRDD(geopysc, rdd_type, srdd)