예제 #1
0
class CellValueCountsTest(BaseTestClass):
    pysc = BaseTestClass.pysc

    cells = np.array([[
        [1.0, 1.0, 1.0, 1.0, 1.0],
        [2.0, 2.0, 2.0, 2.0, 2.0],
        [3.0, 3.0, 3.0, 3.0, 3.0],
        [4.0, 4.0, 4.0, 4.0, 4.0],
        [5.0, 5.0, 5.0, 5.0, 5.0]]])

    layer = [(SpatialKey(0, 0), Tile(cells, 'FLOAT', -1.0)),
             (SpatialKey(1, 0), Tile(cells, 'FLOAT', -1.0,)),
             (SpatialKey(0, 1), Tile(cells, 'FLOAT', -1.0,)),
             (SpatialKey(1, 1), Tile(cells, 'FLOAT', -1.0,))]

    rdd = pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 4.0, 'ymax': 4.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {'cellType': 'float32ud-1.0',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0},
                    'maxKey': {'col': 1, 'row': 1}},
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': layout}}

    raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, metadata)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_counts_with_no_area(self):
        actual = self.raster_rdd.get_cell_value_counts()
        expected = {
            1: 20,
            2: 20,
            3: 20,
            4: 20,
            5: 20
        }

        self.assertDictEqual(actual, expected)

    def test_counts_with_polygon(self):
        area_of_interest = box(0.0, 0.0, 2.0, 2.0)
        actual = self.raster_rdd.get_cell_value_counts(area_of_interest)
        expected = {
            1: 5,
            2: 5,
            3: 5,
            4: 5,
            5: 5
        }

        self.assertDictEqual(actual, expected)
예제 #2
0
    def create_spatial_layer(self):
        cells = np.array([self.first, self.second], dtype='int')
        tile = Tile.from_numpy_array(cells, -1)

        layer = [(SpatialKey(0, 0), tile), (SpatialKey(1, 0), tile),
                 (SpatialKey(0, 1), tile), (SpatialKey(1, 1), tile)]

        rdd = BaseTestClass.pysc.parallelize(layer)

        metadata = {
            'cellType': 'int32ud-1',
            'extent': self.extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0
                },
                'maxKey': {
                    'col': 1,
                    'row': 1
                }
            },
            'layoutDefinition': {
                'extent': self.extent,
                'tileLayout': self.layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd,
                                               metadata)
예제 #3
0
def _create_spacetime_layer(cells: np.ndarray = None) -> TiledRasterLayer:
    # TODO all these "create_spacetime_layer" functions are duplicated across all tests
    #       and better should be moved to some kind of general factory or test fixture
    assert len(cells.shape) == 4
    tile = Tile.from_numpy_array(cells, -1)

    layer = [(SpaceTimeKey(0, 0, now), tile), (SpaceTimeKey(1, 0, now), tile),
             (SpaceTimeKey(0, 1, now), tile), (SpaceTimeKey(1, 1, now), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    metadata = {
        'cellType': 'int32ud-1',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(now)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(now)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': layout
        }
    }

    return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)
예제 #4
0
    def create_spacetime_layer(self):
        cells = np.array([self.first, self.second], dtype='int')
        tile = Tile.from_numpy_array(cells, -1)

        layer = [(SpaceTimeKey(0, 0, self.now), tile),
                 (SpaceTimeKey(1, 0, self.now), tile),
                 (SpaceTimeKey(0, 1, self.now), tile),
                 (SpaceTimeKey(1, 1, self.now), tile)]

        rdd = SparkContext.getOrCreate().parallelize(layer)

        metadata = {'cellType': 'int32ud-1',
                    'extent': self.extent,
                    'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                    'bounds': {
                        'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(self.now)},
                        'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(self.now)}
                    },
                    'layoutDefinition': {
                        'extent': self.extent,
                        'tileLayout': self.layout
                    }
                    }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)
예제 #5
0
    def test_space_time_keys(self):
        temp_keys = [
            SpaceTimeKey(0, 0, instant=self.time),
            SpaceTimeKey(0, 1, instant=self.time)
        ]

        temp_key_layer = [(temp_keys[0], self.tile_2),
                          (temp_keys[1], self.tile_2),
                          (temp_keys[0], self.tile_2),
                          (temp_keys[1], self.tile_2)]

        temp_bounds = Bounds(temp_keys[0], temp_keys[1])

        temp_md = Metadata(bounds=temp_bounds,
                           crs=self.md_proj,
                           cell_type=self.ct,
                           extent=self.extent,
                           layout_definition=self.ld)

        rdd = self.pysc.parallelize(temp_key_layer)
        layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                                temp_md)

        actual = layer.merge()

        self.assertEqual(actual.srdd.rdd().count(), 2)

        for k, v in actual.to_numpy_rdd().collect():
            self.assertTrue((v.cells == self.arr_2).all())
예제 #6
0
    def test_spatial_keys(self):
        keys = [
            SpatialKey(0, 0),
            SpatialKey(0, 1),
            SpatialKey(1, 0),
            SpatialKey(1, 1)
        ]

        key_layer = [(keys[0], self.tile), (keys[1], self.tile),
                     (keys[2], self.tile), (keys[3], self.tile)]

        bounds = Bounds(keys[0], keys[3])

        md = Metadata(bounds=bounds,
                      crs=self.md_proj,
                      cell_type=self.ct,
                      extent=self.extent,
                      layout_definition=self.ld)

        rdd = self.pysc.parallelize(key_layer)
        layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, md)

        actual = layer.collect_keys()

        for x in actual:
            self.assertTrue(x in keys)
예제 #7
0
    def test_space_time_keys(self):
        temp_keys = [
            SpaceTimeKey(0, 0, instant=self.time),
            SpaceTimeKey(0, 1, instant=self.time),
            SpaceTimeKey(1, 0, instant=self.time),
            SpaceTimeKey(1, 1, instant=self.time)
        ]

        temp_key_layer = [(temp_keys[0], self.tile), (temp_keys[1], self.tile),
                          (temp_keys[2], self.tile), (temp_keys[3], self.tile)]

        temp_bounds = Bounds(temp_keys[0], temp_keys[3])

        temp_md = Metadata(bounds=temp_bounds,
                           crs=self.md_proj,
                           cell_type=self.ct,
                           extent=self.extent,
                           layout_definition=self.ld)

        rdd = self.pysc.parallelize(temp_key_layer)
        layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                                temp_md)

        actual = layer.collect_keys()

        for x in actual:
            self.assertTrue(x in temp_keys)
예제 #8
0
def imagecollection_with_two_bands_and_three_dates_webmerc(request):
    from geopyspark.geotrellis import (SpaceTimeKey, Tile, _convert_to_unix_time)
    from geopyspark.geotrellis.constants import LayerType
    from geopyspark.geotrellis.layer import TiledRasterLayer
    import geopyspark as gps

    from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube,GeopysparkCubeMetadata

    date1, date3, rdd = numpy_rdd_two_bands_and_three_dates()

    metadata = {'cellType': 'int32ud-1',
                'extent': extent_webmerc,
                'crs': '+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(date1)},
                    'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(date3)}
                },
                'layoutDefinition': {
                    'extent': extent_webmerc,
                    'tileLayout': layout
                }
                }

    geopyspark_layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)

    datacube = GeopysparkDataCube(pyramid=gps.Pyramid({0: geopyspark_layer}), metadata=GeopysparkCubeMetadata(openeo_metadata))
    if request.instance:
        request.instance.imagecollection_with_two_bands_and_three_dates = datacube
    return datacube
예제 #9
0
    def test_bin_counts(self):
        metadata2 = {'cellType': 'int32ud-500',
                     'extent': self.extent,
                     'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                     'bounds': {
                         'minKey': {'col': 0, 'row': 0},
                         'maxKey': {'col': 0, 'row': 0}},
                     'layoutDefinition': {
                         'extent': self.extent,
                         'tileLayout': {'tileCols': 4, 'tileRows': 4, 'layoutCols': 1, 'layoutRows': 1}}}

        arr2 = np.int8([[[1, 1, 1, 1],
                         [3, 1, 1, 1],
                         [4, 3, 1, 1],
                         [5, 4, 3, 1]]])

        tile2 = Tile(arr2, 'INT', -500)
        rdd2 = BaseTestClass.pysc.parallelize([(self.spatial_key, tile2)])
        tiled2 = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2,
                                                 metadata2)

        hist2 = tiled2.get_class_histogram()
        bin_counts = hist2.bin_counts()

        self.assertEqual(bin_counts, [(1, 10), (3, 3), (4, 2), (5, 1)])
예제 #10
0
class StitchTest(BaseTestClass):
    cells = np.array([[[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 0.0]]])

    layer = [(SpatialKey(0, 0), Tile(cells, 'FLOAT', -1.0)),
             (SpatialKey(1, 0), Tile(
                 cells,
                 'FLOAT',
                 -1.0,
             )), (SpatialKey(0, 1), Tile(
                 cells,
                 'FLOAT',
                 -1.0,
             )), (SpatialKey(1, 1), Tile(
                 cells,
                 'FLOAT',
                 -1.0,
             ))]
    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0
            },
            'maxKey': {
                'col': 1,
                'row': 1
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd,
                                                 metadata)

    @pytest.fixture(scope='class', autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_stitch(self):
        result = self.raster_rdd.stitch()
        self.assertTrue(result.cells.shape == (1, 10, 10))
예제 #11
0
class HillshadeTest(BaseTestClass):
    cells = np.array([[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 3.0, 3.0, 2.0, 1.0, -1.0],
                       [1.0, 1.0, 3.0, 2.0, 2.0, 2.0],
                       [1.0, 2.0, 2.0, 2.0, 2.0, 2.0],
                       [1.0, 1.0, 1.0, 2.0, 2.0, 2.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0, 2.0]]])

    layer = [(SpatialKey(0, 0), Tile(cells, 'FLOAT', -1.0))]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 1, 'layoutRows': 1, 'tileCols': 6, 'tileRows': 6}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0
            },
            'maxKey': {
                'col': 0,
                'row': 0
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 6,
                'tileRows': 6,
                'layoutCols': 1,
                'layoutRows': 1
            }
        }
    }

    raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd,
                                                 metadata)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_hillshade(self):
        calc = zfactor_lat_lng_calculator(Unit.METERS)
        result = hillshade(self.raster_rdd,
                           calc,
                           band=0,
                           azimuth=99.0,
                           altitude=33.0)

        data = result.to_numpy_rdd().first()[1].cells[0][0][0]
        self.assertEqual(data, 63)
예제 #12
0
    def test_add_int(self):
        arr = np.zeros((1, 4, 4))

        tile = Tile(arr, 'FLOAT', -500)
        rdd = BaseTestClass.pysc.parallelize([(self.spatial_key, tile)])
        tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, self.metadata)

        result = tiled + 1
        actual = result.to_numpy_rdd().first()[1].cells

        self.assertTrue((actual == 1).all())
예제 #13
0
    def test_rpow_double(self):
        arr = np.full((1, 4, 4), 3.0, dtype='int64')

        tile = Tile(arr, 'FLOAT', -500)
        rdd = BaseTestClass.pysc.parallelize([(self.spatial_key, tile)])
        tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, self.metadata)

        result = 0.0 ** tiled
        actual = result.to_numpy_rdd().first()[1].cells

        self.assertTrue((actual == 0.0).all())
예제 #14
0
class MaskTest(BaseTestClass):
    pysc = BaseTestClass.pysc
    cells = np.zeros((1, 2, 2))
    cells.fill(1)

    layer = [(SpatialKey(0, 0), Tile(cells, 'FLOAT', -1.0)),
             (SpatialKey(1, 0), Tile(cells, 'FLOAT', -1.0,)),
             (SpatialKey(0, 1), Tile(cells, 'FLOAT', -1.0,)),
             (SpatialKey(1, 1), Tile(cells, 'FLOAT', -1.0,))]

    rdd = pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 4.0, 'ymax': 4.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 2, 'tileRows': 2}
    metadata = {'cellType': 'float32ud-1.0',
                'extent': extent,
                'crs': 4326,
                'bounds': {
                    'minKey': {'col': 0, 'row': 0},
                    'maxKey': {'col': 1, 'row': 1}},
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': layout}}

    geoms = [box(0.0, 0.0, 2.0, 2.0), box(3.0, 3.0, 4.0, 4.0)]
    raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, Metadata.from_dict(metadata))

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_geotrellis_mask(self):
        result = self.raster_rdd.mask(geometries=self.geoms).to_numpy_rdd()
        n = result.map(lambda kv: np.sum(kv[1].cells)).reduce(lambda a, b: a + b)

        self.assertEqual(n, 2.0)

    def test_rdd_mask_no_partition_strategy(self):
        rdd = BaseTestClass.pysc.parallelize(self.geoms)

        result = self.raster_rdd.mask(rdd, options=RasterizerOptions(True, 'PixelIsArea')).to_numpy_rdd()
        n = result.map(lambda kv: np.sum(kv[1].cells)).reduce(lambda a, b: a + b)

        self.assertEqual(n, 2.0)

    def test_rdd_mask_with_partition_strategy(self):
        rdd = BaseTestClass.pysc.parallelize(self.geoms)

        result = self.raster_rdd.mask(rdd, partition_strategy=SpatialPartitionStrategy()).to_numpy_rdd()
        n = result.map(lambda kv: np.sum(kv[1].cells)).reduce(lambda a, b: a + b)

        self.assertEqual(n, 2.0)
예제 #15
0
    def test_mode(self):
        arr2 = np.array([[[1.0, 1.0, 1.0, 1.0],
                          [2.0, 2.0, 2.0, 2.0],
                          [1.0, 3.0, 3.0, 3.0],
                          [4.0, 4.0, 4.0, 4.0]]], dtype=float)

        tile2 = Tile(arr2, 'FLOAT', -500)
        rdd2 = BaseTestClass.pysc.parallelize([(self.spatial_key, tile2)])
        tiled2 = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2, self.metadata)
        hist2 = tiled2.get_histogram()

        self.assertEqual(hist2.mode(), 1.0)
예제 #16
0
    def _create_spacetime_layer(self, no_data):
        def tile(value):
            cells = np.zeros((4, 4), dtype=float)
            cells.fill(value)
            return Tile.from_numpy_array(cells, no_data)

        tiles = [(SpaceTimeKey(0, 0, self.now), tile(0)),
                 (SpaceTimeKey(1, 0, self.now), tile(1)),
                 (SpaceTimeKey(0, 1, self.now), tile(2)),
                 (SpaceTimeKey(1, 1, self.now), tile(no_data))]

        for tile in tiles:
            print(tile)

        layout = {
            'layoutCols': 2,
            'layoutRows': 2,
            'tileCols': 4,
            'tileRows': 4
        }
        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 8.0, 'ymax': 8.0}

        rdd = SparkContext.getOrCreate().parallelize(tiles)
        print(rdd.count())

        metadata = {
            'cellType': 'float64ud-1',
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(self.now)
                },
                'maxKey': {
                    'col': 1,
                    'row': 1,
                    'instant': _convert_to_unix_time(self.now)
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)
예제 #17
0
    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)
예제 #18
0
    def test_combined_operations(self):
        arr = np.array([[[10, 10, 10, 10],
                         [20, 20, 20, 20],
                         [10, 10, 10, 10],
                         [20, 20, 20, 20]]], dtype=int)

        tile = Tile(arr, 'INT', -500)
        rdd = BaseTestClass.pysc.parallelize([(self.spatial_key, tile)])

        tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, self.metadata)

        result = (tiled + tiled) / 2
        actual = result.to_numpy_rdd().first()[1].cells

        self.assertTrue((actual == arr).all())
예제 #19
0
    def test_divide_tiled_rdd(self):
        arr = np.array([[[5.0, 5.0, 5.0, 5.0],
                         [5.0, 5.0, 5.0, 5.0],
                         [5.0, 5.0, 5.0, 5.0],
                         [5.0, 5.0, 5.0, 5.0]]], dtype=float)

        divider = np.array([[[1.0, 1.0, 1.0, 1.0],
                             [1.0, 1.0, 1.0, 1.0],
                             [1.0, 1.0, 1.0, 1.0],
                             [1.0, 1.0, 1.0, 1.0]]], dtype=float)

        tile = Tile(arr, 'FLOAT', float('nan'))
        tile2 = Tile(divider, 'FLOAT', float('nan'))

        rdd = BaseTestClass.pysc.parallelize([(self.spatial_key, tile)])
        rdd2 = BaseTestClass.pysc.parallelize([(self.spatial_key, tile2)])

        tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, self.metadata)
        tiled2 = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2, self.metadata)

        result = tiled / tiled2
        actual = result.to_numpy_rdd().first()[1].cells

        self.assertTrue((actual == 5.0).all())
예제 #20
0
    def test_from_histogram(self):
        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 2,
            'tileRows': 4
        }
        metadata = {
            'cellType': 'float32ud-1.0',
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0
                },
                'maxKey': {
                    'col': 0,
                    'row': 0
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': {
                    'tileCols': 2,
                    'tileRows': 4,
                    'layoutCols': 1,
                    'layoutRows': 1
                }
            }
        }
        spatial_key = SpatialKey(0, 0)
        arr = np.array([[[1.0, 5.0, 2.0, 3.0], [4.0, 6.0, 7.0, 0.0]]],
                       dtype=float)
        tile = Tile(arr, 'FLOAT', -500)
        rdd = BaseTestClass.pysc.parallelize([(spatial_key, tile)])
        tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd,
                                                metadata)
        hist = tiled.get_histogram()

        color_list = self.color_list
        result = ColorMap.from_histogram(hist, color_list)
        self.assertTrue(isinstance(result, ColorMap))
예제 #21
0
    def test_multiply_double(self):
        arr = np.array([[[1.0, 1.0, 1.0, 1.0],
                         [2.0, 2.0, 2.0, 2.0],
                         [3.0, 3.0, 3.0, 3.0],
                         [4.0, 4.0, 4.0, 4.0]]], dtype=float)

        tile = Tile(arr, 'FLOAT', float('nan'))
        rdd = BaseTestClass.pysc.parallelize([(self.spatial_key, tile)])
        tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, self.metadata)

        result = 5.0 * tiled
        actual = result.to_numpy_rdd().first()[1].cells

        expected = np.array([[[5.0, 5.0, 5.0, 5.0],
                              [10.0, 10.0, 10.0, 10.0],
                              [15.0, 15.0, 15.0, 15.0],
                              [20.0, 20.0, 20.0, 20.0]]], dtype=float)

        self.assertTrue((actual == expected).all())
예제 #22
0
def imagecollection_with_two_bands_and_one_date(request):
    import geopyspark as gps
    from geopyspark.geotrellis import (SpaceTimeKey, Tile, _convert_to_unix_time)
    from geopyspark.geotrellis.constants import LayerType
    from geopyspark.geotrellis.layer import TiledRasterLayer
    from pyspark import SparkContext

    from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube

    print(request)
    two_band_one_two = np.array([matrix_of_one, matrix_of_two], dtype='int')
    tile = Tile.from_numpy_array(two_band_one_two, -1)

    date1 = datetime.datetime.strptime("2017-09-25T11:37:00Z", '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=pytz.UTC)


    layer = [(SpaceTimeKey(0, 0, date1), tile),
             (SpaceTimeKey(1, 0, date1), tile),
             (SpaceTimeKey(0, 1, date1), tile),
             (SpaceTimeKey(1, 1, date1), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    metadata = {'cellType': 'int32ud-1',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(date1)},
                    'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(date1)}
                },
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': layout
                }
                }

    geopyspark_layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)

    datacube = GeopysparkDataCube(pyramid=gps.Pyramid({0: geopyspark_layer}), metadata=openeo_metadata)

    if request.instance:
        request.instance.imagecollection_with_two_bands_and_one_date = datacube
    return datacube
예제 #23
0
    def create_spacetime_unsigned_byte_layer(self):
        """
        Returns a single-band uint8ud255 layer consisting of four tiles that each look like this:

         ND 220 220 220
        220 220 220 220
        220 220 220 220
        220 220 220 220

        The extent is (0.0, 0.0) to (4.0, 4.0).
        """
        no_data = 255

        single_band = np.zeros((1, 4, 4))
        single_band.fill(220)
        single_band[0, 0, 0] = no_data

        cells = np.array([single_band], dtype='uint8')
        tile = Tile.from_numpy_array(cells, no_data)

        layer = [(SpaceTimeKey(0, 0, self.now), tile),
                 (SpaceTimeKey(1, 0, self.now), tile),
                 (SpaceTimeKey(0, 1, self.now), tile),
                 (SpaceTimeKey(1, 1, self.now), tile)]

        rdd = SparkContext.getOrCreate().parallelize(layer)

        metadata = {
            'cellType': 'uint8ud255',
            'extent': self.extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {'col': 0, 'row': 0, 'instant': _convert_to_unix_time(self.now)},
                'maxKey': {'col': 1, 'row': 1, 'instant': _convert_to_unix_time(self.now)}
            },
            'layoutDefinition': {
                'extent': self.extent,
                'tileLayout': self.layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata)
예제 #24
0
class SlopeTest(BaseTestClass):
    cells = np.array([[
        [0.0, 0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 1.0, 1.0, 0.0],
        [0.0, 0.0, 1.0, 1.0, 0.0],
        [0.0, 0.0, 0.0, 0.0, 0.0]]])

    tile = Tile.from_numpy_array(cells, -1.0)

    layer = [(SpatialKey(0, 0), tile),
             (SpatialKey(1, 0), tile),
             (SpatialKey(0, 1), tile),
             (SpatialKey(1, 1), tile)]
    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {'cellType': 'float32ud-1.0',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0},
                    'maxKey': {'col': 1, 'row': 1}},
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': {'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2}}}

    raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, metadata)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_slope(self):

        calc = zfactor_lat_lng_calculator(Unit.METERS)
        result = self.raster_rdd.slope(calc).to_numpy_rdd().values().first().cells

        self.assertEqual(result[0, 0, 0], 0.0)
예제 #25
0
    def test_spatial_keys(self):
        keys = [SpatialKey(0, 0), SpatialKey(0, 1)]

        key_layer = [(keys[0], self.tile_1), (keys[1], self.tile_1),
                     (keys[0], self.tile_2), (keys[1], self.tile_2)]

        bounds = Bounds(keys[0], keys[1])

        md = Metadata(bounds=bounds,
                      crs=self.md_proj,
                      cell_type=self.ct,
                      extent=self.extent,
                      layout_definition=self.ld)

        rdd = self.pysc.parallelize(key_layer)
        layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, md)

        actual = layer.merge()

        self.assertEqual(actual.srdd.rdd().count(), 2)

        for k, v in actual.to_numpy_rdd().collect():
            self.assertTrue((v.cells == self.arr_2).all())
예제 #26
0
class TestMultipleDates(TestCase):
    band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0]])

    band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile = Tile.from_numpy_array(band1, no_data_value=-1.0)
    tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile2),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile2),
             (SpaceTimeKey(1, 0, time_2), tile2),
             (SpaceTimeKey(0, 1, time_2), tile2),
             (SpaceTimeKey(1, 1, time_2), tile2),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile2),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }
    collection_metadata = GeopysparkCubeMetadata(
        {"cube:dimensions": {
            "t": {
                "type": "temporal"
            },
        }})

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = SparkContext.getOrCreate().parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    points = [
        Point(1.0, -3.0),
        Point(0.5, 0.5),
        Point(20.0, 3.0),
        Point(1.0, -2.0),
        Point(-10.0, 15.0)
    ]

    def setUp(self):
        # TODO: make this reusable (or a pytest fixture)
        self.temp_folder = Path.cwd() / 'tmp'
        if not self.temp_folder.exists():
            self.temp_folder.mkdir()
        assert self.temp_folder.is_dir()

    def test_reproject_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        ref_path = str(self.temp_folder / "reproj_ref.tiff")
        imagecollection.reduce('max',
                               dimension="t").save_result(ref_path,
                                                          format="GTIFF")

        resampled = imagecollection.resample_spatial(resolution=0,
                                                     projection="EPSG:3395",
                                                     method="max")
        metadata = resampled.pyramid.levels[0].layer_metadata
        print(metadata)
        self.assertTrue("proj=merc" in metadata.crs)
        path = str(self.temp_folder / "reprojected.tiff")
        res = resampled.reduce('max', dimension="t")
        res.save_result(path, format="GTIFF")

        with rasterio.open(ref_path) as ref_ds:
            with rasterio.open(path) as ds:
                print(ds.profile)
                #this reprojection does not change the shape, so we can compare
                assert ds.read().shape == ref_ds.read().shape

                assert (ds.crs.to_epsg() == 3395)

    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("max"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("min"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("sum"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("mean"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])

    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        stitched = cube.reduce_dimension(reducer=reducer("min"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("max"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("sum"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("mean"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_some_nodata(self):
        no_data = -1.0

        input = Pyramid({
            0:
            self._single_pixel_layer(
                {
                    datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    no_data,
                    datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    5.0
                }, no_data)
        })

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        stitched = imagecollection.reduce(
            "min", dimension="t").pyramid.levels[0].stitch()
        #print(stitched)
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", dimension="t").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", dimension="t").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", dimension="t").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_tiles(self):
        print("======")
        tile1 = self._single_pixel_tile(1)
        tile2 = self._single_pixel_tile(5)

        cube = np.array([tile1.cells, tile2.cells])

        # "MIN", "MAX", "SUM", "MEAN", "VARIANCE"

        std = np.std(cube, axis=0)
        var = np.var(cube, axis=0)
        print(var)

    @staticmethod
    def _single_pixel_tile(value, no_data=-1.0):
        cells = np.array([[value]])
        return Tile.from_numpy_array(cells, no_data)

    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)

    def test_reduce_nontemporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        with self.assertRaises(FeatureUnsupportedException) as context:
            imagecollection.reduce(
                "max", dimension="gender").pyramid.levels[0].stitch()
        print(context.exception)

    def test_aggregate_temporal(self):
        """
        Tests deprecated process spec! To be phased out.
        @return:
        """
        interval_list = ["2017-01-01", "2018-01-01"]
        self._test_aggregate_temporal(interval_list)

    def _median_reducer(self):
        from openeo.processes import median
        builder = median({"from_argument": "data"})
        return builder.flat_graph()

    def test_aggregate_temporal_median(self):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        stitched = (imagecollection.aggregate_temporal(
            ["2015-01-01", "2018-01-01"], ["2017-01-03"],
            self._median_reducer(),
            dimension="t").pyramid.levels[0].to_spatial_layer().stitch())
        print(stitched)
        expected_median = np.median(
            [self.tile.cells, self.tile2.cells, self.tile.cells], axis=0)
        #TODO nodata handling??
        assert_array_almost_equal(stitched.cells[0, 1:2, 1:2],
                                  expected_median[1:2, 1:2])

    def _test_aggregate_temporal(self, interval_list):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)
        stitched = (imagecollection.aggregate_temporal(
            interval_list, ["2017-01-03"], "min",
            dimension="t").pyramid.levels[0].to_spatial_layer().stitch())
        print(stitched)
        expected_max = np.min([self.tile2.cells, self.tile.cells], axis=0)
        assert_array_almost_equal(stitched.cells[0, 0:5, 0:5], expected_max)

    def test_aggregate_temporal_100(self):
        self._test_aggregate_temporal([["2017-01-01", "2018-01-01"]])

    def test_max_aggregator(self):
        tiles = [self.tile, self.tile2]
        composite = max_composite(tiles)
        self.assertEqual(2.0, composite.cells[0][0])

    def test_aggregate_max_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        layer = imagecollection.reduce('max', dimension='t').pyramid.levels[0]
        stitched = layer.stitch()
        assert CellType.FLOAT32.value == layer.layer_metadata.cell_type
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])

    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        min_time = cube.reduce_dimension(reducer=reducer('min'),
                                         dimension='t',
                                         env=env)
        max_time = cube.reduce_dimension(reducer=reducer('max'),
                                         dimension='t',
                                         env=env)

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(cube.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])

    def test_apply_dimension_spatiotemporal(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(
            pyramid=input,
            metadata=GeopysparkCubeMetadata({
                "cube:dimensions": {
                    # TODO: also specify other dimensions?
                    "bands": {
                        "type": "bands",
                        "values": ["2"]
                    }
                },
                "summaries": {
                    "eo:bands": [{
                        "name": "2",
                        "common_name": "blue",
                        "wavelength_nm": 496.6,
                        "res_m": 10,
                        "scale": 0.0001,
                        "offset": 0,
                        "type": "int16",
                        "unit": "1"
                    }]
                }
            }))

        udf_code = """
def rct_savitzky_golay(udf_data:UdfData):
    from scipy.signal import savgol_filter

    print(udf_data.get_datacube_list())
    return udf_data
        
        """

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect()
        print(local_tiles)
        self.assertEquals(len(TestMultipleDates.layer), len(local_tiles))
        ref_dict = {
            e[0]: e[1]
            for e in imagecollection.pyramid.levels[0].convert_data_type(
                CellType.FLOAT64).to_numpy_rdd().collect()
        }
        result_dict = {e[0]: e[1] for e in local_tiles}
        for k, v in ref_dict.items():
            tile = result_dict[k]
            assert_array_almost_equal(np.squeeze(v.cells),
                                      np.squeeze(tile.cells),
                                      decimal=2)

    def test_mask_raster_replacement_default_none(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert np.isnan(stitched.cells[0][0][1])

    def test_mask_raster_replacement_float(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube, replacement=10.0).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert stitched.cells[0][0][1] == 10.0

    def test_mask_raster_replacement_int(self):
        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        input = Pyramid({0: self.tiled_raster_rdd})
        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        mask_cube = GeopysparkDataCube(pyramid=mask)
        stitched = cube.mask(mask=mask_cube, replacement=10).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()
        print(stitched)
        assert stitched.cells[0][0][0] == 2.0
        assert stitched.cells[0][0][1] == 10.0

    def test_apply_kernel_float(self):
        kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        img = GeopysparkDataCube(pyramid=input,
                                 metadata=self.collection_metadata)
        stitched = img.apply_kernel(kernel, 2.0).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()

        assert stitched.cells[0][0][0] == 12.0
        assert stitched.cells[0][0][1] == 16.0
        assert stitched.cells[0][1][1] == 20.0

    def test_apply_kernel_int(self):
        kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        img = GeopysparkDataCube(pyramid=input,
                                 metadata=self.collection_metadata)
        stitched = img.apply_kernel(kernel).reduce(
            'max', dimension="t").pyramid.levels[0].stitch()

        assert stitched.cells[0][0][0] == 6.0
        assert stitched.cells[0][0][1] == 8.0
        assert stitched.cells[0][1][1] == 10.0

    def test_resample_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeopysparkDataCube(pyramid=input,
                                             metadata=self.collection_metadata)

        resampled = imagecollection.resample_spatial(resolution=0.05)

        path = str(self.temp_folder / "resampled.tiff")
        res = resampled.reduce('max', dimension="t")
        res.save_result(path, format="GTIFF")

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)
            self.assertAlmostEqual(0.05, ds.res[0], 3)

    def test_rename_dimension(self):
        imagecollection = GeopysparkDataCube(pyramid=Pyramid(
            {0: self.tiled_raster_rdd}),
                                             metadata=self.collection_metadata)

        dim_renamed = imagecollection.rename_dimension('t', 'myNewTimeDim')

        dim_renamed.metadata.assert_valid_dimension('myNewTimeDim')
예제 #27
0
class HistogramTest(BaseTestClass):
    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 1, 'layoutRows': 1, 'tileCols': 4, 'tileRows': 4}
    metadata = {'cellType': 'float32ud-1.0',
                'extent': extent,
                'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                'bounds': {
                    'minKey': {'col': 0, 'row': 0},
                    'maxKey': {'col': 0, 'row': 0}},
                'layoutDefinition': {
                    'extent': extent,
                    'tileLayout': {'tileCols': 4, 'tileRows': 4, 'layoutCols': 1, 'layoutRows': 1}}}

    spatial_key = SpatialKey(0, 0)

    arr = np.array([[[1.0, 1.0, 1.0, 1.0],
                     [2.0, 2.0, 2.0, 2.0],
                     [3.0, 3.0, 3.0, 3.0],
                     [4.0, 4.0, 4.0, 4.0]]], dtype=float)

    tile = Tile(arr, 'FLOAT', -500)
    rdd = BaseTestClass.pysc.parallelize([(spatial_key, tile)])
    tiled = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, metadata)
    hist = tiled.get_histogram()

    def test_min(self):
        self.assertEqual(self.hist.min(), 1.0)

    def test_max(self):
        self.assertEqual(self.hist.max(), 4.0)

    def test_min_max(self):
        self.assertEqual(self.hist.min_max(), (1.0, 4.0))

    def test_mean(self):
        self.assertEqual(self.hist.mean(), 2.5)

    def test_mode(self):
        arr2 = np.array([[[1.0, 1.0, 1.0, 1.0],
                          [2.0, 2.0, 2.0, 2.0],
                          [1.0, 3.0, 3.0, 3.0],
                          [4.0, 4.0, 4.0, 4.0]]], dtype=float)

        tile2 = Tile(arr2, 'FLOAT', -500)
        rdd2 = BaseTestClass.pysc.parallelize([(self.spatial_key, tile2)])
        tiled2 = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2, self.metadata)
        hist2 = tiled2.get_histogram()

        self.assertEqual(hist2.mode(), 1.0)

    def test_quantile_breaks(self):
        result = self.hist.quantile_breaks(4)
        self.assertEqual(result, [1, 2, 3, 4])

    def test_median(self):
        self.assertEqual(self.hist.median(), 2.5)

    def test_cdf(self):
        expected_cdf = [(1.0, 0.25), (2.0, 0.5), (3.0, 0.75), (4.0, 1.0)]

        self.assertEqual(self.hist.cdf(), expected_cdf)

    def test_bucket_count(self):
        self.assertEqual(self.hist.bucket_count(), 4)

    def test_values(self):
        self.assertEqual(self.hist.values(), [1.0, 2.0, 3.0, 4.0])

    def test_item_count(self):
        self.assertEqual(self.hist.item_count(3.0), 4)

    def test_bin_counts(self):
        metadata2 = {'cellType': 'int32ud-500',
                     'extent': self.extent,
                     'crs': '+proj=longlat +datum=WGS84 +no_defs ',
                     'bounds': {
                         'minKey': {'col': 0, 'row': 0},
                         'maxKey': {'col': 0, 'row': 0}},
                     'layoutDefinition': {
                         'extent': self.extent,
                         'tileLayout': {'tileCols': 4, 'tileRows': 4, 'layoutCols': 1, 'layoutRows': 1}}}

        arr2 = np.int8([[[1, 1, 1, 1],
                         [3, 1, 1, 1],
                         [4, 3, 1, 1],
                         [5, 4, 3, 1]]])

        tile2 = Tile(arr2, 'INT', -500)
        rdd2 = BaseTestClass.pysc.parallelize([(self.spatial_key, tile2)])
        tiled2 = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2,
                                                 metadata2)

        hist2 = tiled2.get_class_histogram()
        bin_counts = hist2.bin_counts()

        self.assertEqual(bin_counts, [(1, 10), (3, 3), (4, 2), (5, 1)])

    def test_merge(self):
        arr2 = np.array([[[1.0, 1.0, 1.0, 1.0],
                          [1.0, 1.0, 1.0, 1.0],
                          [1.0, 1.0, 1.0, 1.0],
                          [1.0, 1.0, 1.0, 1.0]]], dtype=float)

        tile2 = Tile(arr2, 'FLOAT', -500)
        rdd2 = BaseTestClass.pysc.parallelize([(self.spatial_key, tile2)])
        tiled2 = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2,
                                                 self.metadata)

        hist2 = tiled2.get_histogram()

        merged = self.hist.merge(hist2)


        self.assertEqual(merged.values(), [1.0, 2.0, 3.0, 4.0])
        self.assertEqual(merged.mean(), 1.75)

    def test_dict_methods(self):
        dict_hist = self.hist.to_dict()
        # value produced by histogram of doubles
        self.assertEqual(dict_hist['maxBucketCount'], 80)

        rebuilt_hist = Histogram.from_dict(dict_hist)
        self.assertEqual(self.hist.min_max(), rebuilt_hist.min_max())
예제 #28
0
class PolygonalSummariesTest(BaseTestClass):
    cells_1 = np.array([[[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                         [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                         [1.0, 1.0, 1.0, 1.0, 0.0]]])

    cells_2 = np.array([[[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                         [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                         [1.0, 1.0, 1.0, 1.0, 0.0]]])

    cells = np.array([cells_1, cells_2])

    layer = [(SpatialKey(0, 0), Tile(cells, 'FLOAT', -1.0)),
             (SpatialKey(1, 0), Tile(
                 cells,
                 'FLOAT',
                 -1.0,
             )), (SpatialKey(0, 1), Tile(
                 cells,
                 'FLOAT',
                 -1.0,
             )), (SpatialKey(1, 1), Tile(
                 cells,
                 'FLOAT',
                 -1.0,
             ))]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0
            },
            'maxKey': {
                'col': 1,
                'row': 1
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd,
                                                metadata)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_polygonal_min(self):
        polygon = Polygon([(0.0, 0.0), (0.0, 33.0), (33.0, 33.0), (33.0, 0.0),
                           (0.0, 0.0)])
        result = self.tiled_rdd.polygonal_min(polygon, float)

        self.assertEqual(result, [0.0, 0.0])

    def test_polygonal_max(self):
        polygon = Polygon([(1.0, 1.0), (1.0, 10.0), (10.0, 10.0), (10.0, 1.0)])
        result = self.tiled_rdd.polygonal_max(polygon, float)

        self.assertEqual(result, [1.0, 1.0])

    def test_polygonal_sum(self):
        polygon = Polygon([(0.0, 0.0), (0.0, 33.0), (33.0, 33.0), (33.0, 0.0),
                           (0.0, 0.0)])
        result = self.tiled_rdd.polygonal_sum(polygon, float)

        self.assertEqual(result, [96.0, 96.0])

    def test_polygonal_mean(self):
        polygon = Polygon([(1.0, 1.0), (1.0, 10.0), (10.0, 10.0), (10.0, 1.0)])
        result = self.tiled_rdd.polygonal_mean(polygon)

        self.assertEqual(result, [1.0, 1.0])
예제 #29
0
class NormalizeTest(BaseTestClass):
    cells = np.array([[[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                       [1.0, 1.0, 1.0, 1.0, 0.0]]])

    layer = [(SpatialKey(0, 0), Tile(cells + 0, 'FLOAT', -1.0)),
             (SpatialKey(1, 0), Tile(
                 cells + 1,
                 'FLOAT',
                 -1.0,
             )), (SpatialKey(0, 1), Tile(
                 cells + 2,
                 'FLOAT',
                 -1.0,
             )), (SpatialKey(1, 1), Tile(
                 cells + 3,
                 'FLOAT',
                 -1.0,
             ))]

    rdd = BaseTestClass.pysc.parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0
            },
            'maxKey': {
                'col': 1,
                'row': 1
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd,
                                                 metadata)

    @pytest.fixture(autouse=True)
    def tearDown(self):
        yield
        BaseTestClass.pysc._gateway.close()

    def test_normalize_all_parameters(self):
        normalized = self.raster_rdd.normalize(old_min=0.0,
                                               old_max=4.0,
                                               new_min=5.0,
                                               new_max=10.0)

        self.assertEqual(normalized.get_min_max(), (5.0, 10.0))

    def test_normalize_no_optinal_parameters(self):
        normalized = self.raster_rdd.normalize(new_min=5.0, new_max=10.0)

        self.assertEqual(normalized.get_min_max(), (5.0, 10.0))

    def test_normalize_old_min(self):
        normalized = self.raster_rdd.normalize(old_min=-1,
                                               new_min=5.0,
                                               new_max=10.0)

        self.assertEqual(normalized.get_min_max(), (6.0, 10.0))

    def test_normalize_old_max(self):
        normalized = self.raster_rdd.normalize(old_max=5.0,
                                               new_min=5.0,
                                               new_max=10.0)

        self.assertEqual(normalized.get_min_max(), (5.0, 9.0))
예제 #30
0
class TestMultipleDates(TestCase):
    band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0],
                      [1.0, 1.0, 1.0, 1.0, 1.0]])

    band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0],
                      [2.0, 2.0, 2.0, 2.0, 2.0]])

    tile = Tile.from_numpy_array(band1, no_data_value=-1.0)
    tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0)
    time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')
    time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z",
                                        '%Y-%m-%dT%H:%M:%SZ')

    layer = [(SpaceTimeKey(0, 0, time_1), tile),
             (SpaceTimeKey(1, 0, time_1), tile2),
             (SpaceTimeKey(0, 1, time_1), tile),
             (SpaceTimeKey(1, 1, time_1), tile),
             (SpaceTimeKey(0, 0, time_2), tile2),
             (SpaceTimeKey(1, 0, time_2), tile2),
             (SpaceTimeKey(0, 1, time_2), tile2),
             (SpaceTimeKey(1, 1, time_2), tile2),
             (SpaceTimeKey(0, 0, time_3), tile),
             (SpaceTimeKey(1, 0, time_3), tile2),
             (SpaceTimeKey(0, 1, time_3), tile),
             (SpaceTimeKey(1, 1, time_3), tile)]

    rdd = SparkContext.getOrCreate().parallelize(layer)

    extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
    layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
    metadata = {
        'cellType': 'float32ud-1.0',
        'extent': extent,
        'crs': '+proj=longlat +datum=WGS84 +no_defs ',
        'bounds': {
            'minKey': {
                'col': 0,
                'row': 0,
                'instant': _convert_to_unix_time(time_1)
            },
            'maxKey': {
                'col': 1,
                'row': 1,
                'instant': _convert_to_unix_time(time_3)
            }
        },
        'layoutDefinition': {
            'extent': extent,
            'tileLayout': {
                'tileCols': 5,
                'tileRows': 5,
                'layoutCols': 2,
                'layoutRows': 2
            }
        }
    }

    tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME,
                                                       rdd, metadata)

    layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_1), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_2), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 0, 2, 1),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(0, 1, 1, 2),
                                       epsg=3857,
                                       instant=time_3), tile),
              (TemporalProjectedExtent(Extent(1, 1, 2, 2),
                                       epsg=3857,
                                       instant=time_3), tile)]

    rdd2 = SparkContext.getOrCreate().parallelize(layer2)
    raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2)

    points = [
        Point(1.0, -3.0),
        Point(0.5, 0.5),
        Point(20.0, 3.0),
        Point(1.0, -2.0),
        Point(-10.0, 15.0)
    ]

    def setUp(self):
        # TODO: make this reusable (or a pytest fixture)
        self.temp_folder = Path.cwd() / 'tmp'
        if not self.temp_folder.exists():
            self.temp_folder.mkdir()
        assert self.temp_folder.is_dir()

    def test_reproject_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        resampled = imagecollection.resample_spatial(resolution=0,
                                                     projection="EPSG:3857",
                                                     method="max")
        metadata = resampled.pyramid.levels[0].layer_metadata
        print(metadata)
        self.assertTrue("proj=merc" in metadata.crs)
        path = str(self.temp_folder / "reprojected.tiff")
        resampled.reduce('max',
                         'temporal').download(path,
                                              format="GTIFF",
                                              parameters={'tiled': True})

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)

    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])

    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_some_nodata(self):
        no_data = -1.0

        input = Pyramid({
            0:
            self._single_pixel_layer(
                {
                    datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    no_data,
                    datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                    5.0
                }, no_data)
        })

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            "min", "temporal").pyramid.levels[0].stitch()
        #print(stitched)
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "max", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "sum", "temporal").pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = imagecollection.reduce(
            "mean", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "variance", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

        stitched = imagecollection.reduce(
            "sd", "temporal").pyramid.levels[0].stitch()
        self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001)

    def test_reduce_tiles(self):
        print("======")
        tile1 = self._single_pixel_tile(1)
        tile2 = self._single_pixel_tile(5)

        cube = np.array([tile1.cells, tile2.cells])

        # "MIN", "MAX", "SUM", "MEAN", "VARIANCE"

        std = np.std(cube, axis=0)
        var = np.var(cube, axis=0)
        print(var)

    @staticmethod
    def _single_pixel_tile(value, no_data=-1.0):
        cells = np.array([[value]])
        return Tile.from_numpy_array(cells, no_data)

    def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0):
        from collections import OrderedDict

        sorted_by_datetime = OrderedDict(sorted(
            grid_value_by_datetime.items()))

        def elem(timestamp, value):
            tile = self._single_pixel_tile(value, no_data)
            return [(SpaceTimeKey(0, 0, timestamp), tile)]

        layer = [
            elem(timestamp, value)
            for timestamp, value in sorted_by_datetime.items()
        ]
        rdd = SparkContext.getOrCreate().parallelize(layer)

        datetimes = list(sorted_by_datetime.keys())

        extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0}
        layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 1,
            'tileRows': 1
        }
        metadata = {
            'cellType': 'float32ud%f' % no_data,
            'extent': extent,
            'crs': '+proj=longlat +datum=WGS84 +no_defs ',
            'bounds': {
                'minKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[0])
                },
                'maxKey': {
                    'col': 0,
                    'row': 0,
                    'instant': _convert_to_unix_time(datetimes[-1])
                }
            },
            'layoutDefinition': {
                'extent': extent,
                'tileLayout': layout
            }
        }

        return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd,
                                               metadata)

    def test_reduce_nontemporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        with self.assertRaises(AttributeError) as context:
            imagecollection.reduce("max",
                                   "spectral").pyramid.levels[0].stitch()
        print(context.exception)

    def test_aggregate_temporal(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.aggregate_temporal(
            ["2017-01-01", "2018-01-01"], ["2017-01-03"],
            "max").pyramid.levels[0].to_spatial_layer().stitch()
        print(stitched)

    def test_max_aggregator(self):
        tiles = [self.tile, self.tile2]
        composite = max_composite(tiles)
        self.assertEqual(2.0, composite.cells[0][0])

    def test_aggregate_max_time(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        stitched = imagecollection.reduce(
            'max', 'temporal').pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])

    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        min_time = imagecollection.reduce('min', 'temporal')
        max_time = imagecollection.reduce('max', 'temporal')

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(imagecollection.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])

    def test_apply_spatiotemporal(self):
        import openeo_udf.functions

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry(), {
                "bands": [{
                    "band_id": "2",
                    "name": "blue",
                    "wavelength_nm": 496.6,
                    "res_m": 10,
                    "scale": 0.0001,
                    "offset": 0,
                    "type": "int16",
                    "unit": "1"
                }]
            })
        import os, openeo_udf
        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_reduce_time_sum.py")
        with open(file_name, "r") as f:
            udf_code = f.read()

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        stitched = result.pyramid.levels[0].to_spatial_layer().stitch()
        print(stitched)
        self.assertEqual(2, stitched.cells[0][0][0])
        self.assertEqual(6, stitched.cells[0][0][5])
        self.assertEqual(4, stitched.cells[0][5][6])

    def test_apply_dimension_spatiotemporal(self):

        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry(), {
                "bands": [{
                    "band_id": "2",
                    "name": "blue",
                    "wavelength_nm": 496.6,
                    "res_m": 10,
                    "scale": 0.0001,
                    "offset": 0,
                    "type": "int16",
                    "unit": "1"
                }]
            })

        udf_code = """
def rct_savitzky_golay(udf_data:UdfData):
    from scipy.signal import savgol_filter

    print(udf_data.get_datacube_list())
    return udf_data
        
        """

        result = imagecollection.apply_tiles_spatiotemporal(udf_code)
        local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect()
        print(local_tiles)
        self.assertEquals(len(TestMultipleDates.layer), len(local_tiles))
        ref_dict = {
            e[0]: e[1]
            for e in imagecollection.pyramid.levels[0].convert_data_type(
                CellType.FLOAT64).to_numpy_rdd().collect()
        }
        result_dict = {e[0]: e[1] for e in local_tiles}
        for k, v in ref_dict.items():
            tile = result_dict[k]
            assert_array_almost_equal(np.squeeze(v.cells),
                                      np.squeeze(tile.cells),
                                      decimal=2)

    def test_mask_raster(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        def createMask(tile):
            tile.cells[0][0][0] = 0.0
            return tile

        mask_layer = self.tiled_raster_rdd.map_tiles(createMask)
        mask = Pyramid({0: mask_layer})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.mask(
            rastermask=GeotrellisTimeSeriesImageCollection(
                mask, InMemoryServiceRegistry()),
            replacement=10.0).reduce('max',
                                     'temporal').pyramid.levels[0].stitch()
        print(stitched)
        self.assertEquals(2.0, stitched.cells[0][0][0])
        self.assertEquals(10.0, stitched.cells[0][0][1])

    def test_apply_kernel(self):
        kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]])

        input = Pyramid({0: self.tiled_raster_rdd})
        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())
        stitched = imagecollection.apply_kernel(kernel, 2.0).reduce(
            'max', 'temporal').pyramid.levels[0].stitch()

        self.assertEquals(12.0, stitched.cells[0][0][0])
        self.assertEquals(16.0, stitched.cells[0][0][1])
        self.assertEquals(20.0, stitched.cells[0][1][1])

    def test_resample_spatial(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        imagecollection = GeotrellisTimeSeriesImageCollection(
            input, InMemoryServiceRegistry())

        resampled = imagecollection.resample_spatial(resolution=0.05)

        path = str(self.temp_folder / "resampled.tiff")
        resampled.reduce('max',
                         'temporal').download(path,
                                              format="GTIFF",
                                              parameters={'tiled': True})

        import rasterio
        with rasterio.open(path) as ds:
            print(ds.profile)
            self.assertAlmostEqual(0.05, ds.res[0], 3)