Example #1
0
def create_temporal_layer(tile):
    layer = [(TemporalProjectedExtent(e_1, now, proj4=crs), tile),
             (TemporalProjectedExtent(e_2, now, proj4=crs), tile),
             (TemporalProjectedExtent(e_3, now, proj4=crs), tile),
             (TemporalProjectedExtent(e_4, now, proj4=crs), tile)]

    return layer
Example #2
0
    def test_temporal_projected_extent(self):
        pes = [
            TemporalProjectedExtent(extent=self.extents[0],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[1],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[2],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[3],
                                    epsg=self.crs,
                                    instant=self.time)
        ]

        pe_layer = [(pes[0], self.tile), (pes[1], self.tile),
                    (pes[2], self.tile), (pes[3], self.tile)]

        rdd = self.pysc.parallelize(pe_layer)
        layer = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd)

        actual = layer.collect_keys()

        for x in actual:
            self.assertTrue(x in pes)
class TemporalProjectedExtentSchemaTest(BaseTestClass):
    extents = [
        Extent(0.0, 0.0, 1.0, 1.0),
        Extent(1.0, 2.0, 3.0, 4.0),
        Extent(5.0, 6.0, 7.0, 8.0),
    ]

    time = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                      '%Y-%m-%dT%H:%M:%SZ')

    expected_tpextents = [
        TemporalProjectedExtent(epsg=2004, extent=extents[0],
                                instant=time)._asdict(),
        TemporalProjectedExtent(epsg=2004, extent=extents[1],
                                instant=time)._asdict(),
        TemporalProjectedExtent(epsg=2004, extent=extents[2],
                                instant=time)._asdict()
    ]

    sc = BaseTestClass.pysc._jsc.sc()
    ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.TemporalProjectedExtentWrapper

    java_rdd = ew.testOut(sc)
    ser = ProtoBufSerializer(temporal_projected_extent_decoder,
                             temporal_projected_extent_encoder)

    rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser))
    collected = [tpex._asdict() for tpex in rdd.collect()]

    @pytest.fixture(scope='class', autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def result_checker(self, actual_tpe, expected_tpe):
        for actual, expected in zip(actual_tpe, expected_tpe):
            self.assertDictEqual(actual, expected)

    def test_encoded_tpextents(self):
        actual_encoded = [
            temporal_projected_extent_encoder(x) for x in self.rdd.collect()
        ]

        for x in range(0, len(self.expected_tpextents)):
            self.expected_tpextents[x]['extent'] = Extent(
                **self.expected_tpextents[x]['extent'])

        expected_encoded = [
            to_pb_temporal_projected_extent(TemporalProjectedExtent(**ex)).SerializeToString() \
            for ex in self.expected_tpextents
        ]

        for actual, expected in zip(actual_encoded, expected_encoded):
            self.assertEqual(actual, expected)

    def test_decoded_tpextents(self):
        self.result_checker(self.collected, self.expected_tpextents)
Example #4
0
class UnionTemporalTest(BaseTestClass):
    time_1 = datetime.datetime.strptime("1993-09-19T07:01:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-09-19T07:01:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    temp_projected_extent_1 = TemporalProjectedExtent(extent, time_1,
                                                      epsg_code)
    temp_projected_extent_2 = TemporalProjectedExtent(extent, time_2,
                                                      epsg_code)

    arr = np.zeros((1, 16, 16))
    tile = Tile(arr, 'FLOAT', -500.0)

    rdd_1 = BaseTestClass.pysc.parallelize([(temp_projected_extent_1, tile)])
    rdd_2 = BaseTestClass.pysc.parallelize([(temp_projected_extent_2, tile)])

    layer_1 = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd_1)
    layer_2 = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd_2)

    tiled_layer_1 = layer_1.tile_to_layout(GlobalLayout())
    tiled_layer_2 = layer_2.tile_to_layout(GlobalLayout())

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_union_of_raster_layers(self):
        result = union(self.layer_1, self.layer_2)

        self.assertTrue(result.srdd.rdd().count(), 2)

    def test_union_of_tiled_raster_layers(self):
        result = union(self.tiled_layer_1, self.tiled_layer_2)

        bounds_1 = self.tiled_layer_1.layer_metadata.bounds
        bounds_2 = self.tiled_layer_2.layer_metadata.bounds

        min_col = min(bounds_1.minKey.col, bounds_2.minKey.col)
        min_row = min(bounds_1.minKey.row, bounds_2.minKey.row)
        min_instant = min(bounds_1.minKey.instant, bounds_2.minKey.instant)

        max_col = max(bounds_1.maxKey.col, bounds_2.maxKey.col)
        max_row = max(bounds_1.maxKey.row, bounds_2.maxKey.row)
        max_instant = max(bounds_1.maxKey.instant, bounds_2.maxKey.instant)

        min_key = SpaceTimeKey(min_col, min_row, min_instant)
        max_key = SpaceTimeKey(max_col, max_row, max_instant)

        self.assertTrue(result.srdd.rdd().count(), 2)
        self.assertEqual(result.layer_metadata.bounds,
                         Bounds(min_key, max_key))
Example #5
0
def from_pb_temporal_projected_extent(pb_temporal_projected_extent):
    """Creates a ``TemporalProjectedExtent`` from a ``ProtoTemporalProjectedExtent``.

    Args:
        pb_temporal_projected_extent (ProtoTemporalProjectedExtent): An instance of
            ``ProtoTemporalProjectedExtent``.

    Returns:
        :class:`~geopyspark.geotrellis.TemporalProjectedExtent`
    """

    instant = datetime.datetime.utcfromtimestamp(pb_temporal_projected_extent.instant / 1000)

    if pb_temporal_projected_extent.crs.epsg is not 0:
        return TemporalProjectedExtent(extent=from_pb_extent(pb_temporal_projected_extent.extent),
                                       epsg=pb_temporal_projected_extent.crs.epsg,
                                       instant=instant)
    else:
        return TemporalProjectedExtent(extent=from_pb_extent(pb_temporal_projected_extent.extent),
                                       proj4=pb_temporal_projected_extent.crs.proj4,
                                       instant=instant)
Example #6
0
    def test_temporal_projected_extent(self):
        pes = [
            TemporalProjectedExtent(extent=self.extents[0],
                                    epsg=self.crs,
                                    instant=self.time),
            TemporalProjectedExtent(extent=self.extents[1],
                                    epsg=self.crs,
                                    instant=self.time),
        ]

        pe_layer = [(pes[0], self.tile_1), (pes[1], self.tile_1),
                    (pes[0], self.tile_2), (pes[1], self.tile_2)]

        rdd = self.pysc.parallelize(pe_layer)
        layer = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd)

        actual = layer.merge()

        self.assertEqual(actual.srdd.rdd().count(), 2)

        for k, v in actual.to_numpy_rdd().collect():
            self.assertTrue((v.cells == self.arr_2).all())
    def test_encoded_tpextents(self):
        actual_encoded = [
            temporal_projected_extent_encoder(x) for x in self.rdd.collect()
        ]

        for x in range(0, len(self.expected_tpextents)):
            self.expected_tpextents[x]['extent'] = Extent(
                **self.expected_tpextents[x]['extent'])

        expected_encoded = [
            to_pb_temporal_projected_extent(TemporalProjectedExtent(**ex)).SerializeToString() \
            for ex in self.expected_tpextents
        ]

        for actual, expected in zip(actual_encoded, expected_encoded):
            self.assertEqual(actual, expected)
Example #8
0
class TestMultipleDates(TestCase):
    band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0]])

    band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile = Tile.from_numpy_array(band1, no_data_value=-1.0)
    tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile2),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile2),
             (SpaceTimeKey(1, 0, time_2), tile2),
             (SpaceTimeKey(0, 1, time_2), tile2),
             (SpaceTimeKey(1, 1, time_2), tile2),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile2),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = SparkContext.getOrCreate().parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    points = [
        Point(1.0, -3.0),
        Point(0.5, 0.5),
        Point(20.0, 3.0),
        Point(1.0, -2.0),
        Point(-10.0, 15.0)
    ]

    def setUp(self):
        # TODO: make this reusable (or a pytest fixture)
        self.temp_folder = Path.cwd() / 'tmp'
        if not self.temp_folder.exists():
            self.temp_folder.mkdir()
        assert self.temp_folder.is_dir()

    def test_reproject_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        resampled = imagecollection.resample_spatial(resolution=0,
                                                     projection="EPSG:3857",
                                                     method="max")
        metadata = resampled.pyramid.levels[0].layer_metadata
        print(metadata)
        self.assertTrue("proj=merc" in metadata.crs)
        path = str(self.temp_folder / "reprojected.tiff")
        resampled.reduce('max',
                         'temporal').download(path,
                                              format="GTIFF",
                                              parameters={'tiled': True})

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)

    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])

    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_some_nodata(self):
        no_data = -1.0

        input = Pyramid({
            0:
            self._single_pixel_layer(
                {
                    datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    no_data,
                    datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    5.0
                }, no_data)
        })

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        #print(stitched)
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_tiles(self):
        print("======")
        tile1 = self._single_pixel_tile(1)
        tile2 = self._single_pixel_tile(5)

        cube = np.array([tile1.cells, tile2.cells])

        # "MIN", "MAX", "SUM", "MEAN", "VARIANCE"

        std = np.std(cube, axis=0)
        var = np.var(cube, axis=0)
        print(var)

    @staticmethod
    def _single_pixel_tile(value, no_data=-1.0):
        cells = np.array([[value]])
        return Tile.from_numpy_array(cells, no_data)

    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)

    def test_reduce_nontemporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        with self.assertRaises(AttributeError) as context:
            imagecollection.reduce("max",
                                   "spectral").pyramid.levels[0].stitch()
        print(context.exception)

    def test_aggregate_temporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.aggregate_temporal(
            ["2017-01-01", "2018-01-01"], ["2017-01-03"],
            "max").pyramid.levels[0].to_spatial_layer().stitch()
        print(stitched)

    def test_max_aggregator(self):
        tiles = [self.tile, self.tile2]
        composite = max_composite(tiles)
        self.assertEqual(2.0, composite.cells[0][0])

    def test_aggregate_max_time(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            'max', 'temporal').pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])

    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        min_time = imagecollection.reduce('min', 'temporal')
        max_time = imagecollection.reduce('max', 'temporal')

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(imagecollection.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])

    def test_apply_spatiotemporal(self):
        import openeo_udf.functions

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry(), {
                "bands": [{
                    "band_id": "2",
                    "name": "blue",
                    "wavelength_nm": 496.6,
                    "res_m": 10,
                    "scale": 0.0001,
                    "offset": 0,
                    "type": "int16",
                    "unit": "1"
                }]
            })
        import os, openeo_udf
        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_reduce_time_sum.py")
        with open(file_name, "r") as f:
            udf_code = f.read()

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        stitched = result.pyramid.levels[0].to_spatial_layer().stitch()
        print(stitched)
        self.assertEqual(2, stitched.cells[0][0][0])
        self.assertEqual(6, stitched.cells[0][0][5])
        self.assertEqual(4, stitched.cells[0][5][6])

    def test_apply_dimension_spatiotemporal(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry(), {
                "bands": [{
                    "band_id": "2",
                    "name": "blue",
                    "wavelength_nm": 496.6,
                    "res_m": 10,
                    "scale": 0.0001,
                    "offset": 0,
                    "type": "int16",
                    "unit": "1"
                }]
            })

        udf_code = """
def rct_savitzky_golay(udf_data:UdfData):
    from scipy.signal import savgol_filter

    print(udf_data.get_datacube_list())
    return udf_data
        
        """

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect()
        print(local_tiles)
        self.assertEquals(len(TestMultipleDates.layer), len(local_tiles))
        ref_dict = {
            e[0]: e[1]
            for e in imagecollection.pyramid.levels[0].convert_data_type(
                CellType.FLOAT64).to_numpy_rdd().collect()
        }
        result_dict = {e[0]: e[1] for e in local_tiles}
        for k, v in ref_dict.items():
            tile = result_dict[k]
            assert_array_almost_equal(np.squeeze(v.cells),
                                      np.squeeze(tile.cells),
                                      decimal=2)

    def test_mask_raster(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.mask(
            rastermask=GeotrellisTimeSeriesImageCollection(
                mask, InMemoryServiceRegistry()),
            replacement=10.0).reduce('max',
                                     'temporal').pyramid.levels[0].stitch()
        print(stitched)
        self.assertEquals(2.0, stitched.cells[0][0][0])
        self.assertEquals(10.0, stitched.cells[0][0][1])

    def test_apply_kernel(self):
        kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.apply_kernel(kernel, 2.0).reduce(
            'max', 'temporal').pyramid.levels[0].stitch()

        self.assertEquals(12.0, stitched.cells[0][0][0])
        self.assertEquals(16.0, stitched.cells[0][0][1])
        self.assertEquals(20.0, stitched.cells[0][1][1])

    def test_resample_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        resampled = imagecollection.resample_spatial(resolution=0.05)

        path = str(self.temp_folder / "resampled.tiff")
        resampled.reduce('max',
                         'temporal').download(path,
                                              format="GTIFF",
                                              parameters={'tiled': True})

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)
            self.assertAlmostEqual(0.05, ds.res[0], 3)
Example #9
0
class TestMultipleDates(TestCase):
    band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0]])

    band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile = Tile.from_numpy_array(band1, no_data_value=-1.0)
    tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile2),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile2),
             (SpaceTimeKey(1, 0, time_2), tile2),
             (SpaceTimeKey(0, 1, time_2), tile2),
             (SpaceTimeKey(1, 1, time_2), tile2),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile2),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }
    collection_metadata = GeopysparkCubeMetadata(
        {"cube:dimensions": {
            "t": {
                "type": "temporal"
            },
        }})

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = SparkContext.getOrCreate().parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    points = [
        Point(1.0, -3.0),
        Point(0.5, 0.5),
        Point(20.0, 3.0),
        Point(1.0, -2.0),
        Point(-10.0, 15.0)
    ]

    def setUp(self):
        # TODO: make this reusable (or a pytest fixture)
        self.temp_folder = Path.cwd() / 'tmp'
        if not self.temp_folder.exists():
            self.temp_folder.mkdir()
        assert self.temp_folder.is_dir()

    def test_reproject_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        ref_path = str(self.temp_folder / "reproj_ref.tiff")
        imagecollection.reduce('max',
                               dimension="t").save_result(ref_path,
                                                          format="GTIFF")

        resampled = imagecollection.resample_spatial(resolution=0,
                                                     projection="EPSG:3395",
                                                     method="max")
        metadata = resampled.pyramid.levels[0].layer_metadata
        print(metadata)
        self.assertTrue("proj=merc" in metadata.crs)
        path = str(self.temp_folder / "reprojected.tiff")
        res = resampled.reduce('max', dimension="t")
        res.save_result(path, format="GTIFF")

        with rasterio.open(ref_path) as ref_ds:
            with rasterio.open(path) as ds:
                print(ds.profile)
                #this reprojection does not change the shape, so we can compare
                assert ds.read().shape == ref_ds.read().shape

                assert (ds.crs.to_epsg() == 3395)

    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("max"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("min"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("sum"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("mean"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])

    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        stitched = cube.reduce_dimension(reducer=reducer("min"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("max"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("sum"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("mean"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_some_nodata(self):
        no_data = -1.0

        input = Pyramid({
            0:
            self._single_pixel_layer(
                {
                    datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    no_data,
                    datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    5.0
                }, no_data)
        })

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        stitched = imagecollection.reduce(
            "min", dimension="t").pyramid.levels[0].stitch()
        #print(stitched)
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", dimension="t").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", dimension="t").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_tiles(self):
        print("======")
        tile1 = self._single_pixel_tile(1)
        tile2 = self._single_pixel_tile(5)

        cube = np.array([tile1.cells, tile2.cells])

        # "MIN", "MAX", "SUM", "MEAN", "VARIANCE"

        std = np.std(cube, axis=0)
        var = np.var(cube, axis=0)
        print(var)

    @staticmethod
    def _single_pixel_tile(value, no_data=-1.0):
        cells = np.array([[value]])
        return Tile.from_numpy_array(cells, no_data)

    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)

    def test_reduce_nontemporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        with self.assertRaises(FeatureUnsupportedException) as context:
            imagecollection.reduce(
                "max", dimension="gender").pyramid.levels[0].stitch()
        print(context.exception)

    def test_aggregate_temporal(self):
        """
        Tests deprecated process spec! To be phased out.
        @return:
        """
        interval_list = ["2017-01-01", "2018-01-01"]
        self._test_aggregate_temporal(interval_list)

    def _median_reducer(self):
        from openeo.processes import median
        builder = median({"from_argument": "data"})
        return builder.flat_graph()

    def test_aggregate_temporal_median(self):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        stitched = (imagecollection.aggregate_temporal(
            ["2015-01-01", "2018-01-01"], ["2017-01-03"],
            self._median_reducer(),
            dimension="t").pyramid.levels[0].to_spatial_layer().stitch())
        print(stitched)
        expected_median = np.median(
            [self.tile.cells, self.tile2.cells, self.tile.cells], axis=0)
        #TODO nodata handling??
        assert_array_almost_equal(stitched.cells[0, 1:2, 1:2],
                                  expected_median[1:2, 1:2])

    def _test_aggregate_temporal(self, interval_list):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        stitched = (imagecollection.aggregate_temporal(
            interval_list, ["2017-01-03"], "min",
            dimension="t").pyramid.levels[0].to_spatial_layer().stitch())
        print(stitched)
        expected_max = np.min([self.tile2.cells, self.tile.cells], axis=0)
        assert_array_almost_equal(stitched.cells[0, 0:5, 0:5], expected_max)

    def test_aggregate_temporal_100(self):
        self._test_aggregate_temporal([["2017-01-01", "2018-01-01"]])

    def test_max_aggregator(self):
        tiles = [self.tile, self.tile2]
        composite = max_composite(tiles)
        self.assertEqual(2.0, composite.cells[0][0])

    def test_aggregate_max_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        layer = imagecollection.reduce('max', dimension='t').pyramid.levels[0]
        stitched = layer.stitch()
        assert CellType.FLOAT32.value == layer.layer_metadata.cell_type
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])

    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        min_time = cube.reduce_dimension(reducer=reducer('min'),
                                         dimension='t',
                                         env=env)
        max_time = cube.reduce_dimension(reducer=reducer('max'),
                                         dimension='t',
                                         env=env)

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(cube.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])

    def test_apply_dimension_spatiotemporal(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(
            pyramid=input,
            metadata=GeopysparkCubeMetadata({
                "cube:dimensions": {
                    # TODO: also specify other dimensions?
                    "bands": {
                        "type": "bands",
                        "values": ["2"]
                    }
                },
                "summaries": {
                    "eo:bands": [{
                        "name": "2",
                        "common_name": "blue",
                        "wavelength_nm": 496.6,
                        "res_m": 10,
                        "scale": 0.0001,
                        "offset": 0,
                        "type": "int16",
                        "unit": "1"
                    }]
                }
            }))

        udf_code = """
def rct_savitzky_golay(udf_data:UdfData):
    from scipy.signal import savgol_filter

    print(udf_data.get_datacube_list())
    return udf_data
        
        """

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect()
        print(local_tiles)
        self.assertEquals(len(TestMultipleDates.layer), len(local_tiles))
        ref_dict = {
            e[0]: e[1]
            for e in imagecollection.pyramid.levels[0].convert_data_type(
                CellType.FLOAT64).to_numpy_rdd().collect()
        }
        result_dict = {e[0]: e[1] for e in local_tiles}
        for k, v in ref_dict.items():
            tile = result_dict[k]
            assert_array_almost_equal(np.squeeze(v.cells),
                                      np.squeeze(tile.cells),
                                      decimal=2)

    def test_mask_raster_replacement_default_none(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert np.isnan(stitched.cells[0][0][1])

    def test_mask_raster_replacement_float(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube, replacement=10.0).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert stitched.cells[0][0][1] == 10.0

    def test_mask_raster_replacement_int(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube, replacement=10).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert stitched.cells[0][0][1] == 10.0

    def test_apply_kernel_float(self):
        kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        img = GeopysparkDataCube(pyramid=input,
                                 metadata=self.collection_metadata)
        stitched = img.apply_kernel(kernel, 2.0).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()

        assert stitched.cells[0][0][0] == 12.0
        assert stitched.cells[0][0][1] == 16.0
        assert stitched.cells[0][1][1] == 20.0

    def test_apply_kernel_int(self):
        kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        img = GeopysparkDataCube(pyramid=input,
                                 metadata=self.collection_metadata)
        stitched = img.apply_kernel(kernel).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()

        assert stitched.cells[0][0][0] == 6.0
        assert stitched.cells[0][0][1] == 8.0
        assert stitched.cells[0][1][1] == 10.0

    def test_resample_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        resampled = imagecollection.resample_spatial(resolution=0.05)

        path = str(self.temp_folder / "resampled.tiff")
        res = resampled.reduce('max', dimension="t")
        res.save_result(path, format="GTIFF")

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)
            self.assertAlmostEqual(0.05, ds.res[0], 3)

    def test_rename_dimension(self):
        imagecollection = GeopysparkDataCube(pyramid=Pyramid(
            {0: self.tiled_raster_rdd}),
                                             metadata=self.collection_metadata)

        dim_renamed = imagecollection.rename_dimension('t', 'myNewTimeDim')

        dim_renamed.metadata.assert_valid_dimension('myNewTimeDim')
Example #10
0
class ToSpatialLayerTest(BaseTestClass):
    band_1 = np.array([
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [1.0, 1.0, 1.0, 1.0, 1.0]])

    band_2 = np.array([
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [2.0, 2.0, 2.0, 2.0, 2.0]])

    bands = np.array([band_1, band_2])
    time = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time), Tile(bands, 'FLOAT', -1.0)),
             (SpaceTimeKey(1, 0, time), Tile(bands, 'FLOAT', -1.0,)),
             (SpaceTimeKey(0, 1, time), Tile(bands, 'FLOAT', -1.0,)),
             (SpaceTimeKey(1, 1, time), Tile(bands, 'FLOAT', -1.0,))]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {'cellType': 'float32ud-1.0',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0, 'instant': 1},
                    'maxKey': {'col': 1, 'row': 1, 'instant': 1}},
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': {'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2}}}

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1), epsg=3857, instant=time), Tile(bands, 'FLOAT', -1.0)),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time), Tile(bands, 'FLOAT', -1.0)),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time), Tile(bands, 'FLOAT', -1.0)),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time), Tile(bands, 'FLOAT', -1.0))]
    rdd2 = BaseTestClass.pysc.parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    # This test should be moved to a more appropriate file once more spatial-temporal
    # tests are made.
    def test_spatial_metadata(self):
        metadata = self.raster_rdd.collect_metadata()
        min_key = metadata.bounds.minKey
        max_key = metadata.bounds.maxKey

        self.assertEqual(min_key.instant, self.time)
        self.assertEqual(max_key.instant, self.time)

    def test_to_spatial_raster_layer(self):
        actual = [k for k, v in self.raster_rdd.to_spatial_layer().to_numpy_rdd().collect()]

        expected = [
            ProjectedExtent(Extent(0, 0, 1, 1), 3857),
            ProjectedExtent(Extent(1, 0, 2, 1), 3857),
            ProjectedExtent(Extent(0, 1, 1, 2), 3857),
            ProjectedExtent(Extent(1, 1, 2, 2), 3857)
        ]

        for a, e in zip(actual, expected):
            self.assertEqual(a, e)

    def test_to_spatial_tiled_layer(self):
        actual = [k for k, v in self.tiled_raster_rdd.to_spatial_layer().to_numpy_rdd().collect()]

        expected = [
            SpatialKey(0, 0),
            SpatialKey(1, 0),
            SpatialKey(0, 1),
            SpatialKey(1, 1)
        ]

        for a, e in zip(actual, expected):
            self.assertEqual(a, e)
Example #11
0
class ToSpatialLayerTest(BaseTestClass):
    band_1 = np.array([[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0]])

    band_2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                       [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                       [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile_1 = Tile.from_numpy_array(np.array([band_1]))
    tile_2 = Tile.from_numpy_array(np.array([band_2]))
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile_1),
             (SpaceTimeKey(1, 0, time_1), tile_1),
             (SpaceTimeKey(0, 1, time_1), tile_1),
             (SpaceTimeKey(1, 1, time_1), tile_1),
             (SpaceTimeKey(0, 0, time_2), tile_2),
             (SpaceTimeKey(1, 0, time_2), tile_2),
             (SpaceTimeKey(0, 1, time_2), tile_2),
             (SpaceTimeKey(1, 1, time_2), tile_2)]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_2)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile_1),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile_2),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile_2),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile_2),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile_2)]

    rdd2 = BaseTestClass.pysc.parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    # This test should be moved to a more appropriate file once more spatial-temporal
    # tests are made.
    def test_spatial_metadata(self):
        metadata = self.raster_rdd.collect_metadata()
        min_key = metadata.bounds.minKey
        max_key = metadata.bounds.maxKey

        self.assertEqual(min_key.instant, self.time_1)
        self.assertEqual(max_key.instant, self.time_2)

    def test_to_spatial_raster_layer(self):
        actual = self.raster_rdd.to_spatial_layer().to_numpy_rdd().keys(
        ).collect()

        expected = [
            ProjectedExtent(Extent(0, 0, 1, 1), 3857),
            ProjectedExtent(Extent(1, 0, 2, 1), 3857),
            ProjectedExtent(Extent(0, 1, 1, 2), 3857),
            ProjectedExtent(Extent(1, 1, 2, 2), 3857)
        ]

        for x in actual:
            self.assertTrue(x in expected)

    def test_to_spatial_target_time_raster_layer(self):
        converted = self.raster_rdd.to_spatial_layer(target_time=self.time_1)
        keys = converted.to_numpy_rdd().keys().collect()
        values = converted.to_numpy_rdd().values().collect()

        expected = [
            ProjectedExtent(Extent(0, 0, 1, 1), 3857),
            ProjectedExtent(Extent(1, 0, 2, 1), 3857),
            ProjectedExtent(Extent(0, 1, 1, 2), 3857),
            ProjectedExtent(Extent(1, 1, 2, 2), 3857)
        ]

        for x in keys:
            self.assertTrue(x in expected)

        for x in values:
            self.assertEqual(x.cells.shape, self.tile_1.cells.shape)
            self.assertTrue((x.cells == 1.0).all())

    def test_to_spatial_tiled_layer(self):
        actual = self.tiled_raster_rdd.to_spatial_layer().to_numpy_rdd().keys(
        ).collect()

        expected = [
            SpatialKey(0, 0),
            SpatialKey(1, 0),
            SpatialKey(0, 1),
            SpatialKey(1, 1)
        ]

        for x in actual:
            self.assertTrue(x in expected)

    def test_to_spatial_target_time_tiled_layer(self):
        converted = self.tiled_raster_rdd.to_spatial_layer(
            target_time=self.time_2)
        keys = converted.to_numpy_rdd().keys().collect()
        values = converted.to_numpy_rdd().values().collect()

        expected = [
            SpatialKey(0, 0),
            SpatialKey(1, 0),
            SpatialKey(0, 1),
            SpatialKey(1, 1)
        ]

        for x in keys:
            self.assertTrue(x in expected)

        for x in values:
            self.assertEqual(x.cells.shape, self.tile_2.cells.shape)
            self.assertTrue((x.cells == 2.0).all())
Example #12
0
class FilterByTimesTest(BaseTestClass):
    band = np.array([[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                     [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                     [1.0, 1.0, 1.0, 1.0, 1.0]])

    tile = Tile.from_numpy_array(band)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile),
             (SpaceTimeKey(1, 0, time_2), tile),
             (SpaceTimeKey(0, 1, time_2), tile),
             (SpaceTimeKey(1, 1, time_2), tile),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = BaseTestClass.pysc.parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_filter_temporal_projected_extent_single_time(self):
        result = self.raster_rdd.filter_by_times([self.time_1])
        expected = self.layer2[:4]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())

    def test_filter_temporal_projected_extent_multi_intervals(self):
        result = self.raster_rdd.filter_by_times([self.time_2, self.time_3])
        expected = self.layer2[4:]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())

    def test_filter_spacetime_key_single_time(self):
        result = self.tiled_raster_rdd.filter_by_times([self.time_3])
        expected = self.layer[8:]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())

    def test_filter_spacetime_key_multi_intervals(self):
        result = self.tiled_raster_rdd.filter_by_times(
            [self.time_1, self.time_2])
        expected = self.layer[:8]
        actual = result.to_numpy_rdd().collect()

        self.assertEqual(len(expected), len(actual))

        for x, y in zip(expected, actual):
            self.assertEqual(x[0], y[0])
            self.assertTrue((x[1].cells == y[1].cells).all())
Example #13
0
 def temporal_projected_extent_decoder(schema_dict):
     return TemporalProjectedExtent(Extent(**schema_dict['extent']),
                                    schema_dict['instant'],
                                    schema_dict.get('epsg'),
                                    schema_dict.get('proj4'))