Example #1
0
class UnionTemporalTest(BaseTestClass):
    time_1 = datetime.datetime.strptime("1993-09-19T07:01:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-09-19T07:01:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    temp_projected_extent_1 = TemporalProjectedExtent(extent, time_1,
                                                      epsg_code)
    temp_projected_extent_2 = TemporalProjectedExtent(extent, time_2,
                                                      epsg_code)

    arr = np.zeros((1, 16, 16))
    tile = Tile(arr, 'FLOAT', -500.0)

    rdd_1 = BaseTestClass.pysc.parallelize([(temp_projected_extent_1, tile)])
    rdd_2 = BaseTestClass.pysc.parallelize([(temp_projected_extent_2, tile)])

    layer_1 = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd_1)
    layer_2 = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd_2)

    tiled_layer_1 = layer_1.tile_to_layout(GlobalLayout())
    tiled_layer_2 = layer_2.tile_to_layout(GlobalLayout())

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_union_of_raster_layers(self):
        result = union(self.layer_1, self.layer_2)

        self.assertTrue(result.srdd.rdd().count(), 2)

    def test_union_of_tiled_raster_layers(self):
        result = union(self.tiled_layer_1, self.tiled_layer_2)

        bounds_1 = self.tiled_layer_1.layer_metadata.bounds
        bounds_2 = self.tiled_layer_2.layer_metadata.bounds

        min_col = min(bounds_1.minKey.col, bounds_2.minKey.col)
        min_row = min(bounds_1.minKey.row, bounds_2.minKey.row)
        min_instant = min(bounds_1.minKey.instant, bounds_2.minKey.instant)

        max_col = max(bounds_1.maxKey.col, bounds_2.maxKey.col)
        max_row = max(bounds_1.maxKey.row, bounds_2.maxKey.row)
        max_instant = max(bounds_1.maxKey.instant, bounds_2.maxKey.instant)

        min_key = SpaceTimeKey(min_col, min_row, min_instant)
        max_key = SpaceTimeKey(max_col, max_row, max_instant)

        self.assertTrue(result.srdd.rdd().count(), 2)
        self.assertEqual(result.layer_metadata.bounds,
                         Bounds(min_key, max_key))
Example #2
0
    def test_temporal_projected_extent(self):
        pes = [
            TemporalProjectedExtent(extent=self.extents[0],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[1],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[2],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[3],
                                    epsg=self.crs,
                                    instant=self.time)
        ]

        pe_layer = [(pes[0], self.tile), (pes[1], self.tile),
                    (pes[2], self.tile), (pes[3], self.tile)]

        rdd = self.pysc.parallelize(pe_layer)
        layer = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd)

        actual = layer.collect_keys()

        for x in actual:
            self.assertTrue(x in pes)
Example #3
0
    def test_local_pyramid(self):
        arr = np.zeros((1, 250, 250))
        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)

        tile = Tile(arr, 'FLOAT', None)
        projected_extent = ProjectedExtent(extent, epsg_code)

        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])

        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        laid_out = raster_rdd.tile_to_layout(LocalLayout(250))

        # Single tile is at level 0
        result = laid_out.pyramid()
        assert result.max_zoom == 0

        laid_out = raster_rdd.tile_to_layout(LocalLayout(25))
        result = laid_out.pyramid()

        assert result.max_zoom == 4
        assert result.levels[4].layer_metadata.tile_layout.layoutCols == 10
        assert result.levels[3].layer_metadata.tile_layout.layoutCols == 5
        assert result.levels[2].layer_metadata.tile_layout.layoutCols == 3
        assert result.levels[1].layer_metadata.tile_layout.layoutCols == 2
        assert result.levels[0].layer_metadata.tile_layout.layoutCols == 1
Example #4
0
def combine_bands(layers):
    """Combines the bands of values that share the same key in two or more ``TiledRasterLayer``\s.

    This method will concat the bands of two or more values with the same key. For example,
    ``layer a`` has values that have 2 bands and ``layer b`` has values with 1 band. When
    ``combine_bands`` is used on both of these layers, then the resulting layer will have
    values with 3 bands, 2 from ``layer a`` and 1 from ``layer b``.

    Note:
        All layers must have the same ``layer_type``. If the layers are ``TiledRasterLayer``\s,
        then all of the layers must also have the same :class:`~geopyspark.geotrellis.TileLayout`
        and ``CRS``.

    Args:
        layers ([:class:`~geopyspark.RasterLayer`] or [:class:`~geopyspark.TiledRasterLayer`] or (:class:`~geopyspark.RasterLayer`) or (:class:`~geopyspark.TiledRasterLayer`)): A
            colection of two or more ``RasterLayer``\s or ``TiledRasterLayer``\s. **The order of the
            layers determines the order in which the bands are concatenated**. With the bands being
            ordered based on the position of their respective layer.

            For example, the first layer in ``layers`` is ``layer a`` which contains 2 bands and
            the second layer is ``layer b`` whose values have 1 band. The resulting layer will
            have values with 3 bands: the first 2 are from ``layer a`` and the third from ``layer b``.
            If the positions of ``layer a`` and ``layer b`` are reversed, then the resulting values'
            first band will be from ``layer b`` and the last 2 will be from ``layer a``.

    Returns:
        :class:`~geopyspark.RasterLayer` or :class:`~geopyspark.TiledRasterLayer`
    """

    if len(layers) == 1:
        raise ValueError(
            "combine_bands can only be performed on 2 or more layers")

    base_layer = layers[0]
    base_layer_type = base_layer.layer_type

    check_layers(base_layer, base_layer_type, layers)

    pysc = get_spark_context()

    if isinstance(base_layer, RasterLayer):
        if base_layer_type == LayerType.SPATIAL:
            result = pysc._gateway.jvm.geopyspark.geotrellis.ProjectedRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        else:
            result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])

        return RasterLayer(base_layer_type, result)

    else:
        if base_layer_type == LayerType.SPATIAL:
            result = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        else:
            result = pysc._gateway.jvm.geopyspark.geotrellis.TemporalTiledRasterLayer.combineBands(
                pysc._jsc.sc(), [x.srdd for x in layers])
        return TiledRasterLayer(base_layer_type, result)
Example #5
0
    def test_all_zeros(self):
        arr = np.zeros((1, 16, 16)).astype('int')
        tile = Tile(arr, 'INT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        min_max = raster_rdd.get_min_max()

        self.assertEqual((0.0, 0.0), min_max)
Example #6
0
class UnionSpatialTest(BaseTestClass):
    projected_extent_1 = ProjectedExtent(extent, epsg_code)
    projected_extent_2 = ProjectedExtent(extent_2, epsg_code)

    arr = np.zeros((1, 16, 16))
    tile = Tile(arr, 'FLOAT', -500.0)

    rdd_1 = BaseTestClass.pysc.parallelize([(projected_extent_1, tile)])
    rdd_2 = BaseTestClass.pysc.parallelize([(projected_extent_2, tile)])

    layer_1 = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd_1)
    layer_2 = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd_2)

    tiled_layer_1 = layer_1.tile_to_layout(GlobalLayout())
    tiled_layer_2 = layer_2.tile_to_layout(GlobalLayout())

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_union_of_raster_layers(self):
        result = union(self.layer_1, self.layer_2)

        self.assertTrue(result.srdd.rdd().count(), 2)

    def test_union_of_tiled_raster_layers(self):
        result = union(self.tiled_layer_1, self.tiled_layer_2)

        bounds_1 = self.tiled_layer_1.layer_metadata.bounds
        bounds_2 = self.tiled_layer_2.layer_metadata.bounds

        min_col = min(bounds_1.minKey.col, bounds_2.minKey.col)
        min_row = min(bounds_1.minKey.row, bounds_2.minKey.row)
        max_col = max(bounds_1.maxKey.col, bounds_2.maxKey.col)
        max_row = max(bounds_1.maxKey.row, bounds_2.maxKey.row)

        min_key = SpatialKey(min_col, min_row)
        max_key = SpatialKey(max_col, max_row)

        self.assertTrue(result.srdd.rdd().count(), 2)
        self.assertEqual(result.layer_metadata.bounds,
                         Bounds(min_key, max_key))
Example #7
0
    def test_multibands(self):
        arr = np.array(
            [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]],
            dtype=int)
        tile = Tile(arr, 'INT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        min_max = raster_rdd.get_min_max()

        self.assertEqual((1.0, 4.0), min_max)
Example #8
0
    def test_floating(self):
        arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                         [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]],
                       dtype=float)

        tile = Tile(arr, 'FLOAT', float('nan'))
        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        min_max = raster_rdd.get_min_max()

        self.assertEqual((0.0, 2.0), min_max)
Example #9
0
class CombineSpatialBandsTest(BaseTestClass):
    layer_1 = create_spatial_layer(tile_1)
    layer_2 = create_spatial_layer(tile_2)
    layer_3 = create_spatial_layer(tile_3)

    r1 = BaseTestClass.pysc.parallelize(layer_1)
    r2 = BaseTestClass.pysc.parallelize(layer_2)
    r3 = BaseTestClass.pysc.parallelize(layer_3)

    rdd_1 = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, r1)
    rdd_2 = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, r2)
    rdd_3 = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, r3)

    tiled_rdd_1 = rdd_1.tile_to_layout(LocalLayout(5, 5))
    tiled_rdd_2 = rdd_2.tile_to_layout(LocalLayout(5, 5))
    tiled_rdd_3 = rdd_3.tile_to_layout(LocalLayout(5, 5))

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_combine_bands_raster_layers(self):
        actual = combine_bands([self.rdd_1, self.rdd_2,
                                self.rdd_3]).to_numpy_rdd().values().collect()

        for x in actual:
            self.assertEqual(x.cells.shape, (3, 5, 5))
            self.assertTrue((x.cells[0, :, :] == first).all())
            self.assertTrue((x.cells[1, :, :] == second).all())
            self.assertTrue((x.cells[2, :, :] == third).all())

    def test_combine_bands_tiled_layers(self):
        actual = combine_bands([self.tiled_rdd_3, self.tiled_rdd_2, self.tiled_rdd_1]) \
                .to_numpy_rdd().values().collect()

        for x in actual:
            self.assertEqual(x.cells.shape, (3, 5, 5))
            self.assertTrue((x.cells[0, :, :] == third).all())
            self.assertTrue((x.cells[1, :, :] == second).all())
            self.assertTrue((x.cells[2, :, :] == first).all())
Example #10
0
    def test_collection_python_rdd(self):
        data = rasterio.open(self.path)
        tile_dict = Tile(data.read(), 'FLOAT', data.nodata)

        rasterio_rdd = self.pysc.parallelize([(self.projected_extent, tile_dict)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rasterio_rdd)

        result = raster_rdd.collect_metadata(layout=self.layout_def)

        self.assertEqual(result.extent, self.extent)
        self.assertEqual(result.layout_definition.extent, self.extent)
        self.assertEqual(result.layout_definition.tileLayout, self.layout)
Example #11
0
    def test_list_bad(self):
        arr = np.zeros((1, 16, 16))
        tile = Tile(arr, 'FLOAT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {'apple, orange, banana': 1}

        with pytest.raises(TypeError):
            result = raster_rdd.reclassify(value_map,
                                           int).to_numpy_rdd().first()[1].cells
Example #12
0
    def test_all_zeros(self):
        arr = np.zeros((1, 16, 16))
        tile = Tile(arr, 'FLOAT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {0: 1}

        result = raster_rdd.reclassify(value_map,
                                       int).to_numpy_rdd().first()[1].cells

        self.assertTrue((result == 1).all())
Example #13
0
    def test_miscellaneous(self):
        arr = np.array(
            [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]],
            dtype=int)
        tile = Tile(arr, 'INT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        self.assertEqual(raster_rdd.count(), 1)
        self.assertTrue(raster_rdd.getNumPartitions() >= 1)
        self.assertTrue(len(raster_rdd.wrapped_rdds()) >= 1)
        self.assertEqual(str(raster_rdd), repr(raster_rdd))
Example #14
0
    def test_no_data_ints(self):
        arr = np.zeros((1, 16, 16), dtype=int)
        tile = Tile(arr, 'INT', NO_DATA_INT)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {0: NO_DATA_INT}

        result = raster_rdd.reclassify(value_map,
                                       int).to_numpy_rdd().first()[1].cells

        self.assertTrue((result == NO_DATA_INT).all())
Example #15
0
    def test_cache(self):
        arr = np.array(
            [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]],
            dtype=int)
        tile = Tile(arr, 'INT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        self.assertEqual(raster_rdd.is_cached, False)

        raster_rdd.cache()
        self.assertEqual(raster_rdd.is_cached, True)
Example #16
0
    def test_ignore_no_data_floats(self):
        arr = np.ones((1, 4, 4))
        np.fill_diagonal(arr[0], float('nan'))
        tile = Tile(arr, 'FLOAT', float('nan'))

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {1.0: 0.0}

        result = raster_rdd.reclassify(
            value_map, float,
            replace_nodata_with=1.0).to_numpy_rdd().first()[1].cells

        self.assertTrue((result == np.identity(4)).all())
Example #17
0
    def test_ignore_no_data_ints(self):
        arr = np.ones((1, 16, 16), int)
        np.fill_diagonal(arr[0], NO_DATA_INT)
        tile = Tile(arr, 'INT', NO_DATA_INT)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {1: 0}

        result = raster_rdd.reclassify(
            value_map, int,
            replace_nodata_with=1).to_numpy_rdd().first()[1].cells

        self.assertTrue((result == np.identity(16, int)).all())
Example #18
0
    def test_no_data_floats(self):
        arr = np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0],
                         [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]],
                       dtype=float)
        tile = Tile(arr, 'FLOAT', float('nan'))

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {0.0: float('nan')}

        result = raster_rdd.reclassify(value_map,
                                       float).to_numpy_rdd().first()[1].cells

        for x in list(result.flatten()):
            self.assertTrue(math.isnan(x))
    def test_to_ud_ubyte(self):
        arr = np.array([[0.4324323432124, 0.0, 0.0], [1.0, 1.0, 1.0]],
                       dtype=float)

        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)
        projected_extent = ProjectedExtent(extent, epsg_code)

        tile = Tile(arr, 'FLOAT', float('nan'))
        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        converted = raster_rdd.convert_data_type(CellType.UINT8,
                                                 no_data_value=-1)
        tile = converted.to_numpy_rdd().first()
        no_data = tile[1].no_data_value

        self.assertEqual(no_data, -1)
Example #20
0
    def test_correct_base(self):
        arr = np.zeros((1, 16, 16))
        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)

        tile = Tile(arr, 'FLOAT', False)
        projected_extent = ProjectedExtent(extent, epsg_code)

        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        tile_layout = TileLayout(32, 32, 16, 16)
        new_extent = Extent(-20037508.342789244, -20037508.342789244,
                            20037508.342789244, 20037508.342789244)
        layout_def = LayoutDefinition(new_extent, tile_layout)

        laid_out = raster_rdd.tile_to_layout(GlobalLayout(tile_size=16))
        result = laid_out.pyramid()
        self.pyramid_building_check(result)
Example #21
0
    def test_projected_extent(self):
        pes = [
            ProjectedExtent(extent=self.extents[0], epsg=self.crs),
            ProjectedExtent(extent=self.extents[1], epsg=self.crs),
        ]

        pe_layer = [(pes[0], self.tile_1), (pes[0], self.tile_2),
                    (pes[1], self.tile_1), (pes[1], self.tile_2)]

        rdd = self.pysc.parallelize(pe_layer)
        layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        actual = layer.merge()

        self.assertEqual(actual.srdd.rdd().count(), 2)

        for k, v in actual.to_numpy_rdd().collect():
            self.assertTrue((v.cells == self.arr_2).all())
Example #22
0
    def test_various_values(self):
        arr = np.array(
            [[[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]]],
            dtype=int)
        tile = Tile(arr, 'INT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {1: 10, 3: 17}

        result = raster_rdd.reclassify(value_map,
                                       int).to_numpy_rdd().first()[1].cells

        expected = np.array([[[10, 10, 10, 10], [17, 17, 17, 17],
                              [17, 17, 17, 17], [-500, -500, -500, -500]]],
                            dtype=int)

        self.assertTrue((result == expected).all())
    def test_no_data_deserialization(self):
        arr = np.int16([[[-32768, -32768, -32768, -32768],
                         [-32768, -32768, -32768, -32768],
                         [-32768, -32768, -32768, -32768],
                         [-32768, -32768, -32768, -32768]]])

        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)
        projected_extent = ProjectedExtent(extent, epsg_code)

        tile = Tile(arr, 'SHORT', -32768)
        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        actual_tile = raster_layer.to_numpy_rdd().first()[1]

        self.assertEqual(actual_tile.cell_type, tile.cell_type)
        self.assertEqual(actual_tile.no_data_value, tile.no_data_value)
        self.assertTrue((actual_tile.cells == tile.cells).all())
Example #24
0
    def test_pyramid_class(self):
        arr = np.zeros((1, 16, 16))
        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)

        tile = Tile(arr, 'FLOAT', False)
        projected_extent = ProjectedExtent(extent, epsg_code)

        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        tile_layout = TileLayout(1, 1, 16, 16)
        reprojected = raster_rdd.tile_to_layout(layout=GlobalLayout(tile_size=16), target_crs=3857)

        result = reprojected.pyramid()
        hist = result.get_histogram()

        self.assertEqual(result.max_zoom, reprojected.zoom_level)
        self.assertTrue(set(result.levels.keys()).issuperset(range(1, 13)))
        self.assertEqual(hist.mean(), 0.0)
        self.assertEqual(hist.min_max(), (0.0, 0.0))
Example #25
0
    def test_ranges(self):
        arr = np.array(
            [[[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]]],
            dtype=int)
        tile = Tile(arr, 'INT', -500)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {2: 20}

        result = raster_rdd.reclassify(value_map, int,
                                       ClassificationStrategy.GREATER_THAN
                                       ).to_numpy_rdd().first()[1].cells

        expected = np.array(
            [[[-500, -500, -500, -500], [-500, -500, -500, -500],
              [20, 20, 20, 20], [20, 20, 20, 20]]],
            dtype=int)

        self.assertTrue((result == expected).all())
Example #26
0
class WithNoDataTest(BaseTestClass):
    epsg_code = 3857
    extent = Extent(0.0, 0.0, 10.0, 10.0)
    projected_extent = ProjectedExtent(extent, epsg_code)

    arr = np.zeros((1, 16, 16))
    tile = Tile(arr, 'FLOAT', -500.0)

    rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])

    layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
    tiled_layer = layer.tile_to_layout()

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_with_no_data_raster_layers(self):
        no_data_layer = self.layer.with_no_data(-10)
        tile = no_data_layer.to_numpy_rdd().first()[1]

        self.assertEqual(tile.no_data_value, -10)

        metadata = no_data_layer.collect_metadata()

        self.assertEqual(metadata.cell_type, "float32ud-10.0")
        self.assertEqual(metadata.no_data_value, -10)

    def test_with_no_data_tiled_raster_layers(self):
        no_data_layer = self.tiled_layer.with_no_data(18)
        tile = no_data_layer.to_numpy_rdd().first()[1]

        self.assertEqual(tile.no_data_value, 18)

        metadata = no_data_layer.layer_metadata

        self.assertEqual(metadata.cell_type, "float32ud18.0")
        self.assertEqual(metadata.no_data_value, 18)
Example #27
0
    def test_floating_voint_ranges(self):
        arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0],
                         [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]],
                       dtype=float)

        tile = Tile(arr, 'FLOAT', float('nan'))
        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {2.0: 5.0}

        result = raster_rdd.reclassify(
            value_map, float,
            ClassificationStrategy.LESS_THAN).to_numpy_rdd().first()[1].cells

        expected = np.array([[[5.0, 5.0, 5.0, 5.0], [5.0, 5.0, 5.0, 5.0],
                              [5.0, 5.0, 5.0, 5.0]]],
                            dtype=float)

        self.assertTrue((result[0, 2, ] == expected).all())
        for x in result[0, 3, ]:
            self.assertTrue(math.isnan(x))
Example #28
0
    def test_pyraminding_with_partitioner(self):
        arr = np.zeros((1, 16, 16))
        epsg_code = 3857
        extent = Extent(0.0, 0.0, 10.0, 10.0)

        tile = Tile(arr, 'FLOAT', False)
        projected_extent = ProjectedExtent(extent, epsg_code)

        rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)
        tile_layout = TileLayout(32, 32, 16, 16)
        new_extent = Extent(-20037508.342789244, -20037508.342789244, 20037508.342789244,
                            20037508.342789244)

        layout_def = LayoutDefinition(new_extent, tile_layout)
        laid_out = raster_rdd.tile_to_layout(GlobalLayout(tile_size=16))

        strategy = SpatialPartitionStrategy(4)

        pyramided = laid_out.pyramid(partition_strategy=strategy)

        self.assertEqual(pyramided.levels[0].get_partition_strategy(), strategy)
Example #29
0
def get(layer_type,
        uri,
        crs=None,
        max_tile_size=DEFAULT_MAX_TILE_SIZE,
        num_partitions=None,
        chunk_size=DEFAULT_CHUNK_SIZE,
        partition_bytes=DEFAULT_PARTITION_BYTES,
        time_tag=DEFAULT_GEOTIFF_TIME_TAG,
        time_format=DEFAULT_GEOTIFF_TIME_FORMAT,
        delimiter=None,
        s3_client=DEFAULT_S3_CLIENT,
        s3_credentials=None):
    """Creates a ``RasterLayer`` from GeoTiffs that are located on the local file system, ``HDFS``,
    or ``S3``.

    Args:
        layer_type (str or :class:`~geopyspark.geotrellis.constants.LayerType`): What the layer type
            of the geotiffs are. This is represented by either constants within ``LayerType`` or by
            a string.

            Note:
                All of the GeoTiffs must have the same saptial type.

        uri (str or [str]): The path or list of paths to the desired tile(s)/directory(ies).
        crs (str or int, optional): The CRS that the output tiles should be
            in. If ``None``, then the CRS that the tiles were originally in
            will be used.
        max_tile_size (int or None, optional): The max size of each tile in the
            resulting Layer. If the size is smaller than the read in tile,
            then that tile will be broken into smaller sections of the given
            size. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_MAX_TILE_SIZE`.
            If ``None``, then the whole tile will be read in.
        num_partitions (int, optional): The number of partitions Spark
            will make when the data is repartitioned. If ``None``, then the
            data will not be repartitioned.

            Note:
                If ``max_tile_size`` is also specified then this parameter
                will be ignored.

        partition_bytes (int, optional): The desired number of bytes per
            partition. This is will ensure that at least one item is assigned for
            each partition. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_PARTITION_BYTES`.
        chunk_size (int, optional): How many bytes of the file should be
            read in at a time. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_CHUNK_SIZE`.
        time_tag (str, optional): The name of the tiff tag that contains
            the time stamp for the tile.
            Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_GEOTIFF_TIME_TAG`.
        time_format (str, optional): The pattern of the time stamp to be parsed.
            Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_GEOTIFF_TIME_FORMAT`.
        delimiter (str, optional): The delimiter to use for S3 object listings.

            Note:
                This parameter will only be used when reading from S3.

        s3_client (str, optional): Which ``S3Client`` to use when reading
            GeoTiffs from S3. There are currently two options: ``default`` and
            ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`.

            Note:
                ``mock`` should only be used in unit tests and debugging.

        s3_credentials(:class:`~geopyspark.geotrellis.s3.Credentials`, optional): Alternative Amazon S3
            credentials to use when accessing the tile(s).

    Returns:
        :class:`~geopyspark.geotrellis.layer.RasterLayer`

    Raises:
        RuntimeError: ``s3_credentials`` were specified but the specified ``uri`` was not S3-based.
    """
    inputs = {k: v for k, v in locals().items() if v is not None}

    pysc = get_spark_context()
    geotiff_rdd = pysc._gateway.jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD

    key = LayerType(inputs.pop('layer_type'))._key_name(False)
    partition_bytes = str(inputs.pop('partition_bytes'))

    uri = inputs.pop('uri')
    uris = (uri if isinstance(uri, list) else [uri])

    try:
        s3_credentials = inputs.pop('s3_credentials')
    except KeyError:
        s3_credentials = None
    else:
        _validate_s3_credentials(uri, s3_credentials)

    uri_type = uri.split(":")[0]

    with set_s3_credentials(s3_credentials, uri_type):
        srdd = geotiff_rdd.get(pysc._jsc.sc(), key, uris, inputs,
                               partition_bytes)

    return RasterLayer(layer_type, srdd)
class BandSelectionTest(BaseTestClass):
    band_1 = np.array([
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0]])

    band_2 = np.array([
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0]])

    band_3 = np.array([
        [3.0, 3.0, 3.0, 3.0, 3.0],
        [3.0, 3.0, 3.0, 3.0, 3.0],
        [3.0, 3.0, 3.0, 3.0, 3.0],
        [3.0, 3.0, 3.0, 3.0, 3.0],
        [3.0, 3.0, 3.0, 3.0, 3.0]])

    bands = np.array([band_1, band_2, band_3])

    layer = [(SpatialKey(0, 0), Tile(bands, 'FLOAT', -1.0)),
             (SpatialKey(1, 0), Tile(bands, 'FLOAT', -1.0,)),
             (SpatialKey(0, 1), Tile(bands, 'FLOAT', -1.0,)),
             (SpatialKey(1, 1), Tile(bands, 'FLOAT', -1.0,))]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {'cellType': 'float32ud-1.0',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0},
                    'maxKey': {'col': 1, 'row': 1}},
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': {'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2}}}

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, metadata, 5)

    layer2 = [(ProjectedExtent(Extent(0, 0, 1, 1), 3857), Tile(bands, 'FLOAT', -1.0)),
              (ProjectedExtent(Extent(1, 0, 2, 1), 3857), Tile(bands, 'FLOAT', -1.0)),
              (ProjectedExtent(Extent(0, 1, 1, 2), 3857), Tile(bands, 'FLOAT', -1.0)),
              (ProjectedExtent(Extent(1, 1, 2, 2), 3857), Tile(bands, 'FLOAT', -1.0))]
    rdd2 = BaseTestClass.pysc.parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_bands_invalid(self):
        with pytest.raises(TypeError):
            self.tiled_raster_rdd.bands("hello").to_numpy_rdd().first()[1]

    def test_bands_int_tiled(self):
        actual = self.tiled_raster_rdd.bands(1).to_numpy_rdd().first()[1]
        expected = np.array(self.band_2)

        self.assertTrue((expected == actual.cells).all())

    def test_bands_int_raster(self):
        actual = self.raster_rdd.bands(1).to_numpy_rdd().first()[1]
        expected = np.array(self.band_2)

        self.assertTrue((expected == actual.cells).all())

    def test_bands_tuple_tiled(self):
        actual = self.tiled_raster_rdd.bands((1, 2)).to_numpy_rdd().first()[1]
        expected = np.array([self.band_2, self.band_3])

        self.assertTrue((expected == actual.cells).all())

    def test_bands_tuple_raster(self):
        actual = self.raster_rdd.bands((1, 2)).to_numpy_rdd().first()[1]
        expected = np.array([self.band_2, self.band_3])

        self.assertTrue((expected == actual.cells).all())

    def test_bands_list_tiled(self):
        actual = self.tiled_raster_rdd.bands([0, 2]).to_numpy_rdd().first()[1]
        expected = np.array([self.band_1, self.band_3])

        self.assertTrue((expected == actual.cells).all())

    def test_bands_list_raster(self):
        actual = self.raster_rdd.bands([0, 2]).to_numpy_rdd().first()[1]
        expected = np.array([self.band_1, self.band_3])

        self.assertTrue((expected == actual.cells).all())

    def test_band_range_tiled(self):
        actual = self.tiled_raster_rdd.bands(range(0, 3)).to_numpy_rdd().first()[1]
        expected = np.array([self.band_1, self.band_2, self.band_3])

        self.assertTrue((expected == actual.cells).all())

    def test_band_range_raster(self):
        actual = self.raster_rdd.bands(range(0, 3)).to_numpy_rdd().first()[1]
        expected = np.array([self.band_1, self.band_2, self.band_3])

        self.assertTrue((expected == actual.cells).all())

    def test_map_tiles_func_tiled(self):
        def test_func(tile):
            cells = tile.cells
            return Tile((cells[0] + cells[1]) / cells[2], tile.cell_type, tile.no_data_value)

        actual = self.tiled_raster_rdd.map_tiles(test_func).to_numpy_rdd().first()[1]
        expected = np.array([self.band_1])

        self.assertTrue((expected == actual.cells).all())

    def test_map_tiles_lambda_tiled(self):
        mapped_layer = self.tiled_raster_rdd.map_tiles(lambda tile: Tile(tile.cells[0], tile.cell_type, tile.no_data_value))
        actual = mapped_layer.to_numpy_rdd().first()[1]
        expected = np.array([self.band_1])

        self.assertEqual(mapped_layer.zoom_level, self.tiled_raster_rdd.zoom_level)
        self.assertTrue((expected == actual.cells).all())

    def test_map_cells_func_raster(self):
        def test_func(cells, nd):
            cells[cells >= 3.0] = nd
            return cells

        actual = self.raster_rdd.map_cells(test_func).to_numpy_rdd().first()[1]

        negative_band = np.array([
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0]])

        expected = np.array([self.band_1, self.band_2, negative_band])

        self.assertTrue((expected == actual.cells).all())

    def test_map_cells_lambda_raster(self):
        actual = self.raster_rdd.map_cells(lambda cells, nd: cells + nd).to_numpy_rdd().first()[1]

        self.assertTrue((0.0 == actual.cells[0, :]).all())
        self.assertTrue((self.band_1 == actual.cells[1, :]).all())
        self.assertTrue((self.band_2 == actual.cells[2, :]).all())

    def test_map_cells_func_tiled(self):
        def test_func(cells, nd):
            cells[cells >= 3.0] = nd
            return cells

        actual = self.tiled_raster_rdd.map_cells(test_func).to_numpy_rdd().first()[1]

        negative_band = np.array([
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0],
            [-1.0, -1.0, -1.0, -1.0, -1.0]])

        expected = np.array([self.band_1, self.band_2, negative_band])

        self.assertTrue((expected == actual.cells).all())

    def test_map_cells_lambda_tiled(self):
        mapped_layer = self.tiled_raster_rdd.map_cells(lambda cells, nd: cells + nd)
        actual = mapped_layer.to_numpy_rdd().first()[1]

        self.assertTrue((0.0 == actual.cells[0, :]).all())
        self.assertTrue((self.band_1 == actual.cells[1, :]).all())
        self.assertTrue((self.band_2 == actual.cells[2, :]).all())
        self.assertEqual(mapped_layer.zoom_level, self.tiled_raster_rdd.zoom_level)