class TemporalProjectedExtentSchemaTest(BaseTestClass): extents = [ Extent(0.0, 0.0, 1.0, 1.0), Extent(1.0, 2.0, 3.0, 4.0), Extent(5.0, 6.0, 7.0, 8.0), ] time = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') expected_tpextents = [ TemporalProjectedExtent(epsg=2004, extent=extents[0], instant=time)._asdict(), TemporalProjectedExtent(epsg=2004, extent=extents[1], instant=time)._asdict(), TemporalProjectedExtent(epsg=2004, extent=extents[2], instant=time)._asdict() ] sc = BaseTestClass.pysc._jsc.sc() ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.TemporalProjectedExtentWrapper java_rdd = ew.testOut(sc) ser = ProtoBufSerializer(temporal_projected_extent_decoder, temporal_projected_extent_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = [tpex._asdict() for tpex in rdd.collect()] @pytest.fixture(scope='class', autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def result_checker(self, actual_tpe, expected_tpe): for actual, expected in zip(actual_tpe, expected_tpe): self.assertDictEqual(actual, expected) def test_encoded_tpextents(self): actual_encoded = [ temporal_projected_extent_encoder(x) for x in self.rdd.collect() ] for x in range(0, len(self.expected_tpextents)): self.expected_tpextents[x]['extent'] = Extent( **self.expected_tpextents[x]['extent']) expected_encoded = [ to_pb_temporal_projected_extent(TemporalProjectedExtent(**ex)).SerializeToString() \ for ex in self.expected_tpextents ] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected) def test_decoded_tpextents(self): self.result_checker(self.collected, self.expected_tpextents)
def test_to_spatial_raster_layer(self): actual = [k for k, v in self.raster_rdd.to_spatial_layer().to_numpy_rdd().collect()] expected = [ ProjectedExtent(Extent(0, 0, 1, 1), 3857), ProjectedExtent(Extent(1, 0, 2, 1), 3857), ProjectedExtent(Extent(0, 1, 1, 2), 3857), ProjectedExtent(Extent(1, 1, 2, 2), 3857) ] for a, e in zip(actual, expected): self.assertEqual(a, e)
def test_to_spatial_raster_layer(self): actual = self.raster_rdd.to_spatial_layer().to_numpy_rdd().keys( ).collect() expected = [ ProjectedExtent(Extent(0, 0, 1, 1), 3857), ProjectedExtent(Extent(1, 0, 2, 1), 3857), ProjectedExtent(Extent(0, 1, 1, 2), 3857), ProjectedExtent(Extent(1, 1, 2, 2), 3857) ] for x in actual: self.assertTrue(x in expected)
def test_query2(self): intersection = Extent(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(self.uri, self.layer_name, 11, intersection, query_proj=3857) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996))
class RasterizeTest(BaseTestClass): extent = Extent(0.0, 0.0, 11.0, 11.0) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_whole_area(self): polygon = Polygon([(0, 11), (11, 11), (11, 0), (0, 0)]) raster_rdd = rasterize([polygon], "EPSG:3857", 11, 1) cells = raster_rdd.to_numpy_rdd().first()[1].cells for x in cells.flatten().tolist(): self.assertTrue(math.isnan(x)) def test_whole_area_integer_crs(self): polygon = Polygon([(0, 11), (11, 11), (11, 0), (0, 0)]) raster_rdd = rasterize([polygon], 3857, 11, 1) cells = raster_rdd.to_numpy_rdd().first()[1].cells for x in cells.flatten().tolist(): self.assertTrue(math.isnan(x))
def from_dataframe(dataframe, target_extent=None): """Reads OSM data from a Spark ``DataFrame``. The resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`. Args: dataframe (DataFrame): A Spark ``DataFrame`` that contains the OSM data. target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If ``None``, then all of the features will be returned. Returns: :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection` """ if target_extent: if isinstance(target_extent, Polygon): target_extent = Extent.from_polygon(target_extent)._asdict() else: target_extent = target_extent._asdict() pysc = get_spark_context() features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromDataFrame( dataframe._jdf, target_extent) return FeaturesCollection(features)
def test_local_pyramid(self): arr = np.zeros((1, 250, 250)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = Tile(arr, 'FLOAT', None) projected_extent = ProjectedExtent(extent, epsg_code) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) laid_out = raster_rdd.tile_to_layout(LocalLayout(250)) # Single tile is at level 0 result = laid_out.pyramid() assert result.max_zoom == 0 laid_out = raster_rdd.tile_to_layout(LocalLayout(25)) result = laid_out.pyramid() assert result.max_zoom == 4 assert result.levels[4].layer_metadata.tile_layout.layoutCols == 10 assert result.levels[3].layer_metadata.tile_layout.layoutCols == 5 assert result.levels[2].layer_metadata.tile_layout.layoutCols == 3 assert result.levels[1].layer_metadata.tile_layout.layoutCols == 2 assert result.levels[0].layer_metadata.tile_layout.layoutCols == 1
def test_encoded_extents(self): expected_encoded = [ to_pb_extent(Extent(**x)).SerializeToString() for x in self.expected_extents ] actual_encoded = [extent_encoder(x) for x in self.collected] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected)
def test_correct_base(self): arr = np.zeros((1, 16, 16)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = Tile(arr, 'FLOAT', False) projected_extent = ProjectedExtent(extent, epsg_code) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) tile_layout = TileLayout(32, 32, 16, 16) new_extent = Extent(-20037508.342789244, -20037508.342789244, 20037508.342789244, 20037508.342789244) layout_def = LayoutDefinition(new_extent, tile_layout) laid_out = raster_rdd.tile_to_layout(GlobalLayout(tile_size=16)) result = laid_out.pyramid() self.pyramid_building_check(result)
def test_to_spatial_target_time_raster_layer(self): converted = self.raster_rdd.to_spatial_layer(target_time=self.time_1) keys = converted.to_numpy_rdd().keys().collect() values = converted.to_numpy_rdd().values().collect() expected = [ ProjectedExtent(Extent(0, 0, 1, 1), 3857), ProjectedExtent(Extent(1, 0, 2, 1), 3857), ProjectedExtent(Extent(0, 1, 1, 2), 3857), ProjectedExtent(Extent(1, 1, 2, 2), 3857) ] for x in keys: self.assertTrue(x in expected) for x in values: self.assertEqual(x.cells.shape, self.tile_1.cells.shape) self.assertTrue((x.cells == 1.0).all())
def test_tile_to_tiled_layer_layout(self): extent = Extent(0., 0., 10., 6.) tile_layout = TileLayout(2, 2, 5, 5) layout_definition = LayoutDefinition(extent, tile_layout) base = self.layer.tile_to_layout(layout_definition) tiled = self.layer.tile_to_layout(layout=base) self.assertDictEqual(tiled.layer_metadata.to_dict(), base.layer_metadata.to_dict())
def from_pb_extent(pb_extent): """Creates an ``Extent`` from a ``ProtoExtent``. Args: pb_extent (ProtoExtent): An instance of ``ProtoExtent``. Returns: :class:`~geopyspark.geotrellis.Extent` """ return Extent(pb_extent.xmin, pb_extent.ymin, pb_extent.xmax, pb_extent.ymax)
def test_encoded_pextents(self): actual_encoded = [projected_extent_encoder(x) for x in self.rdd.collect()] for x in range(0, len(self.projected_extents)): self.projected_extents[x]['extent'] = Extent(**self.projected_extents[x]['extent']) expected_encoded = [ to_pb_projected_extent(ProjectedExtent(**ex)).SerializeToString() for ex in self.projected_extents ] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected)
def test_tile_to_layout_layout_definition(self): layout_definition = self.tiled_layer.layer_metadata.layout_definition new_extent = Extent(layout_definition.extent.xmin, layout_definition.extent.ymin, layout_definition.extent.xmax + 15.0, layout_definition.extent.ymax + 15.0) new_layout_definition = LayoutDefinition(extent=new_extent, tileLayout=layout_definition.tileLayout) actual = self.tiled_layer.tile_to_layout(new_layout_definition).layer_metadata.layout_definition.extent self.assertEqual(actual, new_extent)
def test_pyraminding_with_partitioner(self): arr = np.zeros((1, 16, 16)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = Tile(arr, 'FLOAT', False) projected_extent = ProjectedExtent(extent, epsg_code) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) tile_layout = TileLayout(32, 32, 16, 16) new_extent = Extent(-20037508.342789244, -20037508.342789244, 20037508.342789244, 20037508.342789244) layout_def = LayoutDefinition(new_extent, tile_layout) laid_out = raster_rdd.tile_to_layout(GlobalLayout(tile_size=16)) strategy = SpatialPartitionStrategy(4) pyramided = laid_out.pyramid(partition_strategy=strategy) self.assertEqual(pyramided.levels[0].get_partition_strategy(), strategy)
def test_correct_base(self): arr = np.zeros((1, 16, 16)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = {'data': arr, 'no_data_value': False} projected_extent = {'extent': extent, 'epsg': epsg_code} rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile) ]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) tile_layout = TileLayout(32, 32, 16, 16) new_extent = Extent(-20037508.342789244, -20037508.342789244, 20037508.342789244, 20037508.342789244) metadata = raster_rdd.collect_metadata(extent=new_extent, layout=tile_layout) laid_out = raster_rdd.tile_to_layout(metadata) result = laid_out.pyramid(start_zoom=5, end_zoom=1) self.pyramid_building_check(result)
def from_orc(source, target_extent=None): """Reads in OSM data from an orc file that is located either locally or on S3. The resulting data will be read in as an instance of :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection`. Args: source (str): The path or URI to the orc file to be read. Can either be a local file, or a file on S3. Note: Reading a file from S3 requires additional setup depending on the environment and how the file is being read. The following describes the parameters that need to be set depending on how the files are to be read in. However, **if reading a file on EMR, then the access key and secret key do not need to be set**. If using ``s3a://``, then the following ``SparkConf`` parameters need to be set: - ``spark.hadoop.fs.s3a.impl`` - ``spark.hadoop.fs.s3a.access.key`` - ``spark.hadoop.fs.s3a.secret.key`` If using ``s3n://``, then the following ``SparkConf`` parameters need to be set: - ``spark.hadoop.fs.s3n.access.key`` - ``spark.hadoop.fs.s3n.secret.key`` An alternative to passing in your S3 credentials to ``SparkConf`` would be to export them as environment variables: - ``AWS_ACCESS_KEY_ID=YOUR_KEY`` - ``AWS_SECRET_ACCESS_KEY_ID=YOUR_SECRET_KEY`` target_extent (:class:`~geopyspark.geotrellis.Extent` or ``shapely.geometry.Polygon``, optional): The area of interest. Only features inside this ``Extent`` will be returned. Default is, ``None``. If ``None``, then all of the features will be returned. Returns: :class:`~geopyspark.vector_pipe.features_collection.FeaturesCollection` """ if target_extent: if isinstance(target_extent, Polygon): target_extent = Extent.from_polygon(target_extent)._asdict() else: target_extent = target_extent._asdict() pysc = get_spark_context() session = SparkSession.builder.config( conf=pysc.getConf()).enableHiveSupport().getOrCreate() features = pysc._jvm.geopyspark.vectorpipe.io.OSMReader.fromORC( session._jsparkSession, source, target_extent) return FeaturesCollection(features)
def test_encoded_tuples(self): proto_tuple = tupleMessages_pb2.ProtoTuple() self.extent['extent'] = Extent(**self.extent['extent']) proto_extent = to_pb_projected_extent(ProjectedExtent(**self.extent)) proto_multiband = to_pb_multibandtile(self.multiband_dict) proto_tuple.projectedExtent.CopyFrom(proto_extent) proto_tuple.tiles.CopyFrom(proto_multiband) bs = proto_tuple.SerializeToString() expected_encoded = [self.ser.dumps(x) for x in self.collected] for expected in expected_encoded: self.assertEqual(bs, expected)
def test_to_ud_ubyte(self): arr = np.array([[0.4324323432124, 0.0, 0.0], [1.0, 1.0, 1.0]], dtype=float) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) projected_extent = ProjectedExtent(extent, epsg_code) tile = Tile(arr, 'FLOAT', float('nan')) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) converted = raster_rdd.convert_data_type(CellType.UINT8, no_data_value=-1) tile = converted.to_numpy_rdd().first() no_data = tile[1].no_data_value self.assertEqual(no_data, -1)
def test_no_data_deserialization(self): arr = np.int16([[[-32768, -32768, -32768, -32768], [-32768, -32768, -32768, -32768], [-32768, -32768, -32768, -32768], [-32768, -32768, -32768, -32768]]]) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) projected_extent = ProjectedExtent(extent, epsg_code) tile = Tile(arr, 'SHORT', -32768) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) actual_tile = raster_layer.to_numpy_rdd().first()[1] self.assertEqual(actual_tile.cell_type, tile.cell_type) self.assertEqual(actual_tile.no_data_value, tile.no_data_value) self.assertTrue((actual_tile.cells == tile.cells).all())
class MinMaxTest(BaseTestClass): epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) projected_extent = ProjectedExtent(extent, epsg_code) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_all_zeros(self): arr = np.zeros((1, 16, 16)).astype('int') tile = Tile(arr, 'INT', -500) rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) min_max = raster_rdd.get_min_max() self.assertEqual((0.0, 0.0), min_max) def test_multibands(self): arr = np.array( [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]], dtype=int) tile = Tile(arr, 'INT', -500) rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) min_max = raster_rdd.get_min_max() self.assertEqual((1.0, 4.0), min_max) def test_floating(self): arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0], [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]], dtype=float) tile = Tile(arr, 'FLOAT', float('nan')) rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) min_max = raster_rdd.get_min_max() self.assertEqual((0.0, 2.0), min_max)
def test_wrong_cols_and_rows(self): arr = np.zeros((1, 250, 250)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = {'data': arr, 'no_data_value': False} projected_extent = {'extent': extent, 'epsg': epsg_code} rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile) ]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) metadata = raster_rdd.collect_metadata(tile_size=250) laid_out = raster_rdd.tile_to_layout(metadata) with pytest.raises(ValueError): laid_out.pyramid(start_zoom=12, end_zoom=1)
def test_pyramid_class(self): arr = np.zeros((1, 16, 16)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = Tile(arr, 'FLOAT', False) projected_extent = ProjectedExtent(extent, epsg_code) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) tile_layout = TileLayout(1, 1, 16, 16) reprojected = raster_rdd.tile_to_layout(layout=GlobalLayout(tile_size=16), target_crs=3857) result = reprojected.pyramid() hist = result.get_histogram() self.assertEqual(result.max_zoom, reprojected.zoom_level) self.assertTrue(set(result.levels.keys()).issuperset(range(1, 13))) self.assertEqual(hist.mean(), 0.0) self.assertEqual(hist.min_max(), (0.0, 0.0))
class WithNoDataTest(BaseTestClass): epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) projected_extent = ProjectedExtent(extent, epsg_code) arr = np.zeros((1, 16, 16)) tile = Tile(arr, 'FLOAT', -500.0) rdd = BaseTestClass.pysc.parallelize([(projected_extent, tile)]) layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) tiled_layer = layer.tile_to_layout() @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_with_no_data_raster_layers(self): no_data_layer = self.layer.with_no_data(-10) tile = no_data_layer.to_numpy_rdd().first()[1] self.assertEqual(tile.no_data_value, -10) metadata = no_data_layer.collect_metadata() self.assertEqual(metadata.cell_type, "float32ud-10.0") self.assertEqual(metadata.no_data_value, -10) def test_with_no_data_tiled_raster_layers(self): no_data_layer = self.tiled_layer.with_no_data(18) tile = no_data_layer.to_numpy_rdd().first()[1] self.assertEqual(tile.no_data_value, 18) metadata = no_data_layer.layer_metadata self.assertEqual(metadata.cell_type, "float32ud18.0") self.assertEqual(metadata.no_data_value, 18)
def make_raster(x, y, v, cols=4, rows=4, ct=CellType.FLOAT32, crs=4326): cells = np.zeros((1, rows, cols)) cells.fill(v) # extent of a single cell is 1, no fence-post here extent = ProjectedExtent(Extent(x, y, x + cols, y + rows), crs) return (extent, Tile(cells, ct, None))
class MergeTest(BaseTestClass): arr_1 = np.zeros((1, 4, 4)) arr_2 = np.ones((1, 4, 4)) tile_1 = Tile.from_numpy_array(arr_1) tile_2 = Tile.from_numpy_array(arr_2) crs = 4326 time = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') extents = [ Extent(0.0, 0.0, 4.0, 4.0), Extent(0.0, 4.0, 4.0, 8.0), ] extent = Extent(0.0, 0.0, 8.0, 8.0) layout = TileLayout(2, 2, 5, 5) ct = 'float32ud-1.0' md_proj = '+proj=longlat +datum=WGS84 +no_defs ' ld = LayoutDefinition(extent, layout) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_projected_extent(self): pes = [ ProjectedExtent(extent=self.extents[0], epsg=self.crs), ProjectedExtent(extent=self.extents[1], epsg=self.crs), ] pe_layer = [(pes[0], self.tile_1), (pes[0], self.tile_2), (pes[1], self.tile_1), (pes[1], self.tile_2)] rdd = self.pysc.parallelize(pe_layer) layer = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd) actual = layer.merge() self.assertEqual(actual.srdd.rdd().count(), 2) for k, v in actual.to_numpy_rdd().collect(): self.assertTrue((v.cells == self.arr_2).all()) def test_temporal_projected_extent(self): pes = [ TemporalProjectedExtent(extent=self.extents[0], epsg=self.crs, instant=self.time), TemporalProjectedExtent(extent=self.extents[1], epsg=self.crs, instant=self.time), ] pe_layer = [(pes[0], self.tile_1), (pes[1], self.tile_1), (pes[0], self.tile_2), (pes[1], self.tile_2)] rdd = self.pysc.parallelize(pe_layer) layer = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd) actual = layer.merge() self.assertEqual(actual.srdd.rdd().count(), 2) for k, v in actual.to_numpy_rdd().collect(): self.assertTrue((v.cells == self.arr_2).all()) def test_spatial_keys(self): keys = [SpatialKey(0, 0), SpatialKey(0, 1)] key_layer = [(keys[0], self.tile_1), (keys[1], self.tile_1), (keys[0], self.tile_2), (keys[1], self.tile_2)] bounds = Bounds(keys[0], keys[1]) md = Metadata(bounds=bounds, crs=self.md_proj, cell_type=self.ct, extent=self.extent, layout_definition=self.ld) rdd = self.pysc.parallelize(key_layer) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, md) actual = layer.merge() self.assertEqual(actual.srdd.rdd().count(), 2) for k, v in actual.to_numpy_rdd().collect(): self.assertTrue((v.cells == self.arr_2).all()) def test_space_time_keys(self): temp_keys = [ SpaceTimeKey(0, 0, instant=self.time), SpaceTimeKey(0, 1, instant=self.time) ] temp_key_layer = [(temp_keys[0], self.tile_2), (temp_keys[1], self.tile_2), (temp_keys[0], self.tile_2), (temp_keys[1], self.tile_2)] temp_bounds = Bounds(temp_keys[0], temp_keys[1]) temp_md = Metadata(bounds=temp_bounds, crs=self.md_proj, cell_type=self.ct, extent=self.extent, layout_definition=self.ld) rdd = self.pysc.parallelize(temp_key_layer) layer = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, temp_md) actual = layer.merge() self.assertEqual(actual.srdd.rdd().count(), 2) for k, v in actual.to_numpy_rdd().collect(): self.assertTrue((v.cells == self.arr_2).all())
class TestMultipleDates(TestCase): band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0]]) band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0]]) tile = Tile.from_numpy_array(band1, no_data_value=-1.0) tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0) time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') layer = [(SpaceTimeKey(0, 0, time_1), tile), (SpaceTimeKey(1, 0, time_1), tile2), (SpaceTimeKey(0, 1, time_1), tile), (SpaceTimeKey(1, 1, time_1), tile), (SpaceTimeKey(0, 0, time_2), tile2), (SpaceTimeKey(1, 0, time_2), tile2), (SpaceTimeKey(0, 1, time_2), tile2), (SpaceTimeKey(1, 1, time_2), tile2), (SpaceTimeKey(0, 0, time_3), tile), (SpaceTimeKey(1, 0, time_3), tile2), (SpaceTimeKey(0, 1, time_3), tile), (SpaceTimeKey(1, 1, time_3), tile)] rdd = SparkContext.getOrCreate().parallelize(layer) extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0} layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5} metadata = { 'cellType': 'float32ud-1.0', 'extent': extent, 'crs': '+proj=longlat +datum=WGS84 +no_defs ', 'bounds': { 'minKey': { 'col': 0, 'row': 0, 'instant': _convert_to_unix_time(time_1) }, 'maxKey': { 'col': 1, 'row': 1, 'instant': _convert_to_unix_time(time_3) } }, 'layoutDefinition': { 'extent': extent, 'tileLayout': { 'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2 } } } tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata) layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_3), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_3), tile), (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time_3), tile), (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time_3), tile)] rdd2 = SparkContext.getOrCreate().parallelize(layer2) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2) points = [ Point(1.0, -3.0), Point(0.5, 0.5), Point(20.0, 3.0), Point(1.0, -2.0), Point(-10.0, 15.0) ] def setUp(self): # TODO: make this reusable (or a pytest fixture) self.temp_folder = Path.cwd() / 'tmp' if not self.temp_folder.exists(): self.temp_folder.mkdir() assert self.temp_folder.is_dir() def test_reproject_spatial(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) resampled = imagecollection.resample_spatial(resolution=0, projection="EPSG:3857", method="max") metadata = resampled.pyramid.levels[0].layer_metadata print(metadata) self.assertTrue("proj=merc" in metadata.crs) path = str(self.temp_folder / "reprojected.tiff") resampled.reduce('max', 'temporal').download(path, format="GTIFF", parameters={'tiled': True}) import rasterio with rasterio.open(path) as ds: print(ds.profile) def test_reduce(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.reduce( "max", "temporal").pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertEqual(2.0, stitched.cells[0][0][1]) stitched = imagecollection.reduce( "min", "temporal").pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertEqual(1.0, stitched.cells[0][0][1]) stitched = imagecollection.reduce( "sum", "temporal").pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertEqual(4.0, stitched.cells[0][0][1]) stitched = imagecollection.reduce( "mean", "temporal").pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1]) stitched = imagecollection.reduce( "variance", "temporal").pyramid.levels[0].stitch() print(stitched) self.assertEqual(0.0, stitched.cells[0][0][0]) self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1]) stitched = imagecollection.reduce( "sd", "temporal").pyramid.levels[0].stitch() print(stitched) self.assertEqual(0.0, stitched.cells[0][0][0]) self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1]) def test_reduce_all_data(self): input = Pyramid({ 0: self._single_pixel_layer({ datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): 1.0, datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): 5.0 }) }) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.reduce( "min", "temporal").pyramid.levels[0].stitch() self.assertEqual(1.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "max", "temporal").pyramid.levels[0].stitch() self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "sum", "temporal").pyramid.levels[0].stitch() self.assertEqual(6.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "mean", "temporal").pyramid.levels[0].stitch() self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001) stitched = imagecollection.reduce( "variance", "temporal").pyramid.levels[0].stitch() self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001) stitched = imagecollection.reduce( "sd", "temporal").pyramid.levels[0].stitch() self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001) def test_reduce_some_nodata(self): no_data = -1.0 input = Pyramid({ 0: self._single_pixel_layer( { datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): no_data, datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): 5.0 }, no_data) }) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.reduce( "min", "temporal").pyramid.levels[0].stitch() #print(stitched) self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "max", "temporal").pyramid.levels[0].stitch() self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "sum", "temporal").pyramid.levels[0].stitch() self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "mean", "temporal").pyramid.levels[0].stitch() self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001) stitched = imagecollection.reduce( "variance", "temporal").pyramid.levels[0].stitch() self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001) stitched = imagecollection.reduce( "sd", "temporal").pyramid.levels[0].stitch() self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001) def test_reduce_tiles(self): print("======") tile1 = self._single_pixel_tile(1) tile2 = self._single_pixel_tile(5) cube = np.array([tile1.cells, tile2.cells]) # "MIN", "MAX", "SUM", "MEAN", "VARIANCE" std = np.std(cube, axis=0) var = np.var(cube, axis=0) print(var) @staticmethod def _single_pixel_tile(value, no_data=-1.0): cells = np.array([[value]]) return Tile.from_numpy_array(cells, no_data) def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0): from collections import OrderedDict sorted_by_datetime = OrderedDict(sorted( grid_value_by_datetime.items())) def elem(timestamp, value): tile = self._single_pixel_tile(value, no_data) return [(SpaceTimeKey(0, 0, timestamp), tile)] layer = [ elem(timestamp, value) for timestamp, value in sorted_by_datetime.items() ] rdd = SparkContext.getOrCreate().parallelize(layer) datetimes = list(sorted_by_datetime.keys()) extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0} layout = { 'layoutCols': 1, 'layoutRows': 1, 'tileCols': 1, 'tileRows': 1 } metadata = { 'cellType': 'float32ud%f' % no_data, 'extent': extent, 'crs': '+proj=longlat +datum=WGS84 +no_defs ', 'bounds': { 'minKey': { 'col': 0, 'row': 0, 'instant': _convert_to_unix_time(datetimes[0]) }, 'maxKey': { 'col': 0, 'row': 0, 'instant': _convert_to_unix_time(datetimes[-1]) } }, 'layoutDefinition': { 'extent': extent, 'tileLayout': layout } } return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata) def test_reduce_nontemporal(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) with self.assertRaises(AttributeError) as context: imagecollection.reduce("max", "spectral").pyramid.levels[0].stitch() print(context.exception) def test_aggregate_temporal(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.aggregate_temporal( ["2017-01-01", "2018-01-01"], ["2017-01-03"], "max").pyramid.levels[0].to_spatial_layer().stitch() print(stitched) def test_max_aggregator(self): tiles = [self.tile, self.tile2] composite = max_composite(tiles) self.assertEqual(2.0, composite.cells[0][0]) def test_aggregate_max_time(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.reduce( 'max', 'temporal').pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) def test_min_time(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) min_time = imagecollection.reduce('min', 'temporal') max_time = imagecollection.reduce('max', 'temporal') stitched = min_time.pyramid.levels[0].stitch() print(stitched) self.assertEquals(2.0, stitched.cells[0][0][0]) for p in self.points[1:3]: result = min_time.timeseries(p.x, p.y, srs="EPSG:3857") print(result) print(imagecollection.timeseries(p.x, p.y, srs="EPSG:3857")) max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857") self.assertEqual(1.0, result['NoDate']) self.assertEqual(2.0, max_result['NoDate']) def test_apply_spatiotemporal(self): import openeo_udf.functions input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry(), { "bands": [{ "band_id": "2", "name": "blue", "wavelength_nm": 496.6, "res_m": 10, "scale": 0.0001, "offset": 0, "type": "int16", "unit": "1" }] }) import os, openeo_udf dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "datacube_reduce_time_sum.py") with open(file_name, "r") as f: udf_code = f.read() result = imagecollection.apply_tiles_spatiotemporal(udf_code) stitched = result.pyramid.levels[0].to_spatial_layer().stitch() print(stitched) self.assertEqual(2, stitched.cells[0][0][0]) self.assertEqual(6, stitched.cells[0][0][5]) self.assertEqual(4, stitched.cells[0][5][6]) def test_apply_dimension_spatiotemporal(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry(), { "bands": [{ "band_id": "2", "name": "blue", "wavelength_nm": 496.6, "res_m": 10, "scale": 0.0001, "offset": 0, "type": "int16", "unit": "1" }] }) udf_code = """ def rct_savitzky_golay(udf_data:UdfData): from scipy.signal import savgol_filter print(udf_data.get_datacube_list()) return udf_data """ result = imagecollection.apply_tiles_spatiotemporal(udf_code) local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect() print(local_tiles) self.assertEquals(len(TestMultipleDates.layer), len(local_tiles)) ref_dict = { e[0]: e[1] for e in imagecollection.pyramid.levels[0].convert_data_type( CellType.FLOAT64).to_numpy_rdd().collect() } result_dict = {e[0]: e[1] for e in local_tiles} for k, v in ref_dict.items(): tile = result_dict[k] assert_array_almost_equal(np.squeeze(v.cells), np.squeeze(tile.cells), decimal=2) def test_mask_raster(self): input = Pyramid({0: self.tiled_raster_rdd}) def createMask(tile): tile.cells[0][0][0] = 0.0 return tile mask_layer = self.tiled_raster_rdd.map_tiles(createMask) mask = Pyramid({0: mask_layer}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.mask( rastermask=GeotrellisTimeSeriesImageCollection( mask, InMemoryServiceRegistry()), replacement=10.0).reduce('max', 'temporal').pyramid.levels[0].stitch() print(stitched) self.assertEquals(2.0, stitched.cells[0][0][0]) self.assertEquals(10.0, stitched.cells[0][0][1]) def test_apply_kernel(self): kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]]) input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) stitched = imagecollection.apply_kernel(kernel, 2.0).reduce( 'max', 'temporal').pyramid.levels[0].stitch() self.assertEquals(12.0, stitched.cells[0][0][0]) self.assertEquals(16.0, stitched.cells[0][0][1]) self.assertEquals(20.0, stitched.cells[0][1][1]) def test_resample_spatial(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeotrellisTimeSeriesImageCollection( input, InMemoryServiceRegistry()) resampled = imagecollection.resample_spatial(resolution=0.05) path = str(self.temp_folder / "resampled.tiff") resampled.reduce('max', 'temporal').download(path, format="GTIFF", parameters={'tiled': True}) import rasterio with rasterio.open(path) as ds: print(ds.profile) self.assertAlmostEqual(0.05, ds.res[0], 3)
class TestMultipleDates(TestCase): band1 = np.array([[-1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0]]) band2 = np.array([[2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, -1.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0]]) tile = Tile.from_numpy_array(band1, no_data_value=-1.0) tile2 = Tile.from_numpy_array(band2, no_data_value=-1.0) time_1 = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') time_2 = datetime.datetime.strptime("2017-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') time_3 = datetime.datetime.strptime("2017-10-17T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') layer = [(SpaceTimeKey(0, 0, time_1), tile), (SpaceTimeKey(1, 0, time_1), tile2), (SpaceTimeKey(0, 1, time_1), tile), (SpaceTimeKey(1, 1, time_1), tile), (SpaceTimeKey(0, 0, time_2), tile2), (SpaceTimeKey(1, 0, time_2), tile2), (SpaceTimeKey(0, 1, time_2), tile2), (SpaceTimeKey(1, 1, time_2), tile2), (SpaceTimeKey(0, 0, time_3), tile), (SpaceTimeKey(1, 0, time_3), tile2), (SpaceTimeKey(0, 1, time_3), tile), (SpaceTimeKey(1, 1, time_3), tile)] rdd = SparkContext.getOrCreate().parallelize(layer) extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0} layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5} metadata = { 'cellType': 'float32ud-1.0', 'extent': extent, 'crs': '+proj=longlat +datum=WGS84 +no_defs ', 'bounds': { 'minKey': { 'col': 0, 'row': 0, 'instant': _convert_to_unix_time(time_1) }, 'maxKey': { 'col': 1, 'row': 1, 'instant': _convert_to_unix_time(time_3) } }, 'layoutDefinition': { 'extent': extent, 'tileLayout': { 'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2 } } } collection_metadata = GeopysparkCubeMetadata( {"cube:dimensions": { "t": { "type": "temporal" }, }}) tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata) layer2 = [(TemporalProjectedExtent(Extent(0, 0, 1, 1), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time_1), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time_2), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_3), tile), (TemporalProjectedExtent(Extent(1, 0, 2, 1), epsg=3857, instant=time_3), tile), (TemporalProjectedExtent(Extent(0, 1, 1, 2), epsg=3857, instant=time_3), tile), (TemporalProjectedExtent(Extent(1, 1, 2, 2), epsg=3857, instant=time_3), tile)] rdd2 = SparkContext.getOrCreate().parallelize(layer2) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd2) points = [ Point(1.0, -3.0), Point(0.5, 0.5), Point(20.0, 3.0), Point(1.0, -2.0), Point(-10.0, 15.0) ] def setUp(self): # TODO: make this reusable (or a pytest fixture) self.temp_folder = Path.cwd() / 'tmp' if not self.temp_folder.exists(): self.temp_folder.mkdir() assert self.temp_folder.is_dir() def test_reproject_spatial(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) ref_path = str(self.temp_folder / "reproj_ref.tiff") imagecollection.reduce('max', dimension="t").save_result(ref_path, format="GTIFF") resampled = imagecollection.resample_spatial(resolution=0, projection="EPSG:3395", method="max") metadata = resampled.pyramid.levels[0].layer_metadata print(metadata) self.assertTrue("proj=merc" in metadata.crs) path = str(self.temp_folder / "reprojected.tiff") res = resampled.reduce('max', dimension="t") res.save_result(path, format="GTIFF") with rasterio.open(ref_path) as ref_ds: with rasterio.open(path) as ds: print(ds.profile) #this reprojection does not change the shape, so we can compare assert ds.read().shape == ref_ds.read().shape assert (ds.crs.to_epsg() == 3395) def test_reduce(self): input = Pyramid({0: self.tiled_raster_rdd}) cube = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) env = EvalEnv() stitched = cube.reduce_dimension(dimension="t", reducer=reducer("max"), env=env).pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertEqual(2.0, stitched.cells[0][0][1]) stitched = cube.reduce_dimension(dimension="t", reducer=reducer("min"), env=env).pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertEqual(1.0, stitched.cells[0][0][1]) stitched = cube.reduce_dimension(dimension="t", reducer=reducer("sum"), env=env).pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertEqual(4.0, stitched.cells[0][0][1]) stitched = cube.reduce_dimension(dimension="t", reducer=reducer("mean"), env=env).pyramid.levels[0].stitch() print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1]) stitched = cube.reduce_dimension(reducer=reducer("variance"), dimension="t", env=env).pyramid.levels[0].stitch() print(stitched) self.assertEqual(0.0, stitched.cells[0][0][0]) self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1]) stitched = cube.reduce_dimension(reducer=reducer("sd"), dimension="t", env=env).pyramid.levels[0].stitch() print(stitched) self.assertEqual(0.0, stitched.cells[0][0][0]) self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1]) def test_reduce_all_data(self): input = Pyramid({ 0: self._single_pixel_layer({ datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): 1.0, datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): 5.0 }) }) cube = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) env = EvalEnv() stitched = cube.reduce_dimension(reducer=reducer("min"), dimension="t", env=env).pyramid.levels[0].stitch() self.assertEqual(1.0, stitched.cells[0][0][0]) stitched = cube.reduce_dimension(reducer=reducer("max"), dimension="t", env=env).pyramid.levels[0].stitch() self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = cube.reduce_dimension(reducer=reducer("sum"), dimension="t", env=env).pyramid.levels[0].stitch() self.assertEqual(6.0, stitched.cells[0][0][0]) stitched = cube.reduce_dimension(reducer=reducer("mean"), dimension="t", env=env).pyramid.levels[0].stitch() self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001) stitched = cube.reduce_dimension(reducer=reducer("variance"), dimension="t", env=env).pyramid.levels[0].stitch() self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001) stitched = cube.reduce_dimension(reducer=reducer("sd"), dimension="t", env=env).pyramid.levels[0].stitch() self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001) def test_reduce_some_nodata(self): no_data = -1.0 input = Pyramid({ 0: self._single_pixel_layer( { datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): no_data, datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'): 5.0 }, no_data) }) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) stitched = imagecollection.reduce( "min", dimension="t").pyramid.levels[0].stitch() #print(stitched) self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "max", dimension="t").pyramid.levels[0].stitch() self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "sum", dimension="t").pyramid.levels[0].stitch() self.assertEqual(5.0, stitched.cells[0][0][0]) stitched = imagecollection.reduce( "mean", dimension="t").pyramid.levels[0].stitch() self.assertAlmostEqual(5.0, stitched.cells[0][0][0], delta=0.001) stitched = imagecollection.reduce( "variance", dimension="t").pyramid.levels[0].stitch() self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001) stitched = imagecollection.reduce( "sd", dimension="t").pyramid.levels[0].stitch() self.assertAlmostEqual(0.0, stitched.cells[0][0][0], delta=0.001) def test_reduce_tiles(self): print("======") tile1 = self._single_pixel_tile(1) tile2 = self._single_pixel_tile(5) cube = np.array([tile1.cells, tile2.cells]) # "MIN", "MAX", "SUM", "MEAN", "VARIANCE" std = np.std(cube, axis=0) var = np.var(cube, axis=0) print(var) @staticmethod def _single_pixel_tile(value, no_data=-1.0): cells = np.array([[value]]) return Tile.from_numpy_array(cells, no_data) def _single_pixel_layer(self, grid_value_by_datetime, no_data=-1.0): from collections import OrderedDict sorted_by_datetime = OrderedDict(sorted( grid_value_by_datetime.items())) def elem(timestamp, value): tile = self._single_pixel_tile(value, no_data) return [(SpaceTimeKey(0, 0, timestamp), tile)] layer = [ elem(timestamp, value) for timestamp, value in sorted_by_datetime.items() ] rdd = SparkContext.getOrCreate().parallelize(layer) datetimes = list(sorted_by_datetime.keys()) extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 1.0, 'ymax': 1.0} layout = { 'layoutCols': 1, 'layoutRows': 1, 'tileCols': 1, 'tileRows': 1 } metadata = { 'cellType': 'float32ud%f' % no_data, 'extent': extent, 'crs': '+proj=longlat +datum=WGS84 +no_defs ', 'bounds': { 'minKey': { 'col': 0, 'row': 0, 'instant': _convert_to_unix_time(datetimes[0]) }, 'maxKey': { 'col': 0, 'row': 0, 'instant': _convert_to_unix_time(datetimes[-1]) } }, 'layoutDefinition': { 'extent': extent, 'tileLayout': layout } } return TiledRasterLayer.from_numpy_rdd(LayerType.SPACETIME, rdd, metadata) def test_reduce_nontemporal(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) with self.assertRaises(FeatureUnsupportedException) as context: imagecollection.reduce( "max", dimension="gender").pyramid.levels[0].stitch() print(context.exception) def test_aggregate_temporal(self): """ Tests deprecated process spec! To be phased out. @return: """ interval_list = ["2017-01-01", "2018-01-01"] self._test_aggregate_temporal(interval_list) def _median_reducer(self): from openeo.processes import median builder = median({"from_argument": "data"}) return builder.flat_graph() def test_aggregate_temporal_median(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) stitched = (imagecollection.aggregate_temporal( ["2015-01-01", "2018-01-01"], ["2017-01-03"], self._median_reducer(), dimension="t").pyramid.levels[0].to_spatial_layer().stitch()) print(stitched) expected_median = np.median( [self.tile.cells, self.tile2.cells, self.tile.cells], axis=0) #TODO nodata handling?? assert_array_almost_equal(stitched.cells[0, 1:2, 1:2], expected_median[1:2, 1:2]) def _test_aggregate_temporal(self, interval_list): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) stitched = (imagecollection.aggregate_temporal( interval_list, ["2017-01-03"], "min", dimension="t").pyramid.levels[0].to_spatial_layer().stitch()) print(stitched) expected_max = np.min([self.tile2.cells, self.tile.cells], axis=0) assert_array_almost_equal(stitched.cells[0, 0:5, 0:5], expected_max) def test_aggregate_temporal_100(self): self._test_aggregate_temporal([["2017-01-01", "2018-01-01"]]) def test_max_aggregator(self): tiles = [self.tile, self.tile2] composite = max_composite(tiles) self.assertEqual(2.0, composite.cells[0][0]) def test_aggregate_max_time(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) layer = imagecollection.reduce('max', dimension='t').pyramid.levels[0] stitched = layer.stitch() assert CellType.FLOAT32.value == layer.layer_metadata.cell_type print(stitched) self.assertEqual(2.0, stitched.cells[0][0][0]) def test_min_time(self): input = Pyramid({0: self.tiled_raster_rdd}) cube = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) env = EvalEnv() min_time = cube.reduce_dimension(reducer=reducer('min'), dimension='t', env=env) max_time = cube.reduce_dimension(reducer=reducer('max'), dimension='t', env=env) stitched = min_time.pyramid.levels[0].stitch() print(stitched) self.assertEquals(2.0, stitched.cells[0][0][0]) for p in self.points[1:3]: result = min_time.timeseries(p.x, p.y, srs="EPSG:3857") print(result) print(cube.timeseries(p.x, p.y, srs="EPSG:3857")) max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857") self.assertEqual(1.0, result['NoDate']) self.assertEqual(2.0, max_result['NoDate']) def test_apply_dimension_spatiotemporal(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube( pyramid=input, metadata=GeopysparkCubeMetadata({ "cube:dimensions": { # TODO: also specify other dimensions? "bands": { "type": "bands", "values": ["2"] } }, "summaries": { "eo:bands": [{ "name": "2", "common_name": "blue", "wavelength_nm": 496.6, "res_m": 10, "scale": 0.0001, "offset": 0, "type": "int16", "unit": "1" }] } })) udf_code = """ def rct_savitzky_golay(udf_data:UdfData): from scipy.signal import savgol_filter print(udf_data.get_datacube_list()) return udf_data """ result = imagecollection.apply_tiles_spatiotemporal(udf_code) local_tiles = result.pyramid.levels[0].to_numpy_rdd().collect() print(local_tiles) self.assertEquals(len(TestMultipleDates.layer), len(local_tiles)) ref_dict = { e[0]: e[1] for e in imagecollection.pyramid.levels[0].convert_data_type( CellType.FLOAT64).to_numpy_rdd().collect() } result_dict = {e[0]: e[1] for e in local_tiles} for k, v in ref_dict.items(): tile = result_dict[k] assert_array_almost_equal(np.squeeze(v.cells), np.squeeze(tile.cells), decimal=2) def test_mask_raster_replacement_default_none(self): def createMask(tile): tile.cells[0][0][0] = 0.0 return tile input = Pyramid({0: self.tiled_raster_rdd}) mask_layer = self.tiled_raster_rdd.map_tiles(createMask) mask = Pyramid({0: mask_layer}) cube = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) mask_cube = GeopysparkDataCube(pyramid=mask) stitched = cube.mask(mask=mask_cube).reduce( 'max', dimension="t").pyramid.levels[0].stitch() print(stitched) assert stitched.cells[0][0][0] == 2.0 assert np.isnan(stitched.cells[0][0][1]) def test_mask_raster_replacement_float(self): def createMask(tile): tile.cells[0][0][0] = 0.0 return tile input = Pyramid({0: self.tiled_raster_rdd}) mask_layer = self.tiled_raster_rdd.map_tiles(createMask) mask = Pyramid({0: mask_layer}) cube = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) mask_cube = GeopysparkDataCube(pyramid=mask) stitched = cube.mask(mask=mask_cube, replacement=10.0).reduce( 'max', dimension="t").pyramid.levels[0].stitch() print(stitched) assert stitched.cells[0][0][0] == 2.0 assert stitched.cells[0][0][1] == 10.0 def test_mask_raster_replacement_int(self): def createMask(tile): tile.cells[0][0][0] = 0.0 return tile input = Pyramid({0: self.tiled_raster_rdd}) mask_layer = self.tiled_raster_rdd.map_tiles(createMask) mask = Pyramid({0: mask_layer}) cube = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) mask_cube = GeopysparkDataCube(pyramid=mask) stitched = cube.mask(mask=mask_cube, replacement=10).reduce( 'max', dimension="t").pyramid.levels[0].stitch() print(stitched) assert stitched.cells[0][0][0] == 2.0 assert stitched.cells[0][0][1] == 10.0 def test_apply_kernel_float(self): kernel = np.array([[0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0]]) input = Pyramid({0: self.tiled_raster_rdd}) img = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) stitched = img.apply_kernel(kernel, 2.0).reduce( 'max', dimension="t").pyramid.levels[0].stitch() assert stitched.cells[0][0][0] == 12.0 assert stitched.cells[0][0][1] == 16.0 assert stitched.cells[0][1][1] == 20.0 def test_apply_kernel_int(self): kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) input = Pyramid({0: self.tiled_raster_rdd}) img = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) stitched = img.apply_kernel(kernel).reduce( 'max', dimension="t").pyramid.levels[0].stitch() assert stitched.cells[0][0][0] == 6.0 assert stitched.cells[0][0][1] == 8.0 assert stitched.cells[0][1][1] == 10.0 def test_resample_spatial(self): input = Pyramid({0: self.tiled_raster_rdd}) imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) resampled = imagecollection.resample_spatial(resolution=0.05) path = str(self.temp_folder / "resampled.tiff") res = resampled.reduce('max', dimension="t") res.save_result(path, format="GTIFF") import rasterio with rasterio.open(path) as ds: print(ds.profile) self.assertAlmostEqual(0.05, ds.res[0], 3) def test_rename_dimension(self): imagecollection = GeopysparkDataCube(pyramid=Pyramid( {0: self.tiled_raster_rdd}), metadata=self.collection_metadata) dim_renamed = imagecollection.rename_dimension('t', 'myNewTimeDim') dim_renamed.metadata.assert_valid_dimension('myNewTimeDim')
class BandSelectionTest(BaseTestClass): band_1 = np.array([ [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0]]) band_2 = np.array([ [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0]]) band_3 = np.array([ [3.0, 3.0, 3.0, 3.0, 3.0], [3.0, 3.0, 3.0, 3.0, 3.0], [3.0, 3.0, 3.0, 3.0, 3.0], [3.0, 3.0, 3.0, 3.0, 3.0], [3.0, 3.0, 3.0, 3.0, 3.0]]) bands = np.array([band_1, band_2, band_3]) layer = [(SpatialKey(0, 0), Tile(bands, 'FLOAT', -1.0)), (SpatialKey(1, 0), Tile(bands, 'FLOAT', -1.0,)), (SpatialKey(0, 1), Tile(bands, 'FLOAT', -1.0,)), (SpatialKey(1, 1), Tile(bands, 'FLOAT', -1.0,))] rdd = BaseTestClass.pysc.parallelize(layer) extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0} layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5} metadata = {'cellType': 'float32ud-1.0', 'extent': extent, 'crs': '+proj=longlat +datum=WGS84 +no_defs ', 'bounds': { 'minKey': {'col': 0, 'row': 0}, 'maxKey': {'col': 1, 'row': 1}}, 'layoutDefinition': { 'extent': extent, 'tileLayout': {'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2}}} tiled_raster_rdd = TiledRasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd, metadata, 5) layer2 = [(ProjectedExtent(Extent(0, 0, 1, 1), 3857), Tile(bands, 'FLOAT', -1.0)), (ProjectedExtent(Extent(1, 0, 2, 1), 3857), Tile(bands, 'FLOAT', -1.0)), (ProjectedExtent(Extent(0, 1, 1, 2), 3857), Tile(bands, 'FLOAT', -1.0)), (ProjectedExtent(Extent(1, 1, 2, 2), 3857), Tile(bands, 'FLOAT', -1.0))] rdd2 = BaseTestClass.pysc.parallelize(layer2) raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd2) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_bands_invalid(self): with pytest.raises(TypeError): self.tiled_raster_rdd.bands("hello").to_numpy_rdd().first()[1] def test_bands_int_tiled(self): actual = self.tiled_raster_rdd.bands(1).to_numpy_rdd().first()[1] expected = np.array(self.band_2) self.assertTrue((expected == actual.cells).all()) def test_bands_int_raster(self): actual = self.raster_rdd.bands(1).to_numpy_rdd().first()[1] expected = np.array(self.band_2) self.assertTrue((expected == actual.cells).all()) def test_bands_tuple_tiled(self): actual = self.tiled_raster_rdd.bands((1, 2)).to_numpy_rdd().first()[1] expected = np.array([self.band_2, self.band_3]) self.assertTrue((expected == actual.cells).all()) def test_bands_tuple_raster(self): actual = self.raster_rdd.bands((1, 2)).to_numpy_rdd().first()[1] expected = np.array([self.band_2, self.band_3]) self.assertTrue((expected == actual.cells).all()) def test_bands_list_tiled(self): actual = self.tiled_raster_rdd.bands([0, 2]).to_numpy_rdd().first()[1] expected = np.array([self.band_1, self.band_3]) self.assertTrue((expected == actual.cells).all()) def test_bands_list_raster(self): actual = self.raster_rdd.bands([0, 2]).to_numpy_rdd().first()[1] expected = np.array([self.band_1, self.band_3]) self.assertTrue((expected == actual.cells).all()) def test_band_range_tiled(self): actual = self.tiled_raster_rdd.bands(range(0, 3)).to_numpy_rdd().first()[1] expected = np.array([self.band_1, self.band_2, self.band_3]) self.assertTrue((expected == actual.cells).all()) def test_band_range_raster(self): actual = self.raster_rdd.bands(range(0, 3)).to_numpy_rdd().first()[1] expected = np.array([self.band_1, self.band_2, self.band_3]) self.assertTrue((expected == actual.cells).all()) def test_map_tiles_func_tiled(self): def test_func(tile): cells = tile.cells return Tile((cells[0] + cells[1]) / cells[2], tile.cell_type, tile.no_data_value) actual = self.tiled_raster_rdd.map_tiles(test_func).to_numpy_rdd().first()[1] expected = np.array([self.band_1]) self.assertTrue((expected == actual.cells).all()) def test_map_tiles_lambda_tiled(self): mapped_layer = self.tiled_raster_rdd.map_tiles(lambda tile: Tile(tile.cells[0], tile.cell_type, tile.no_data_value)) actual = mapped_layer.to_numpy_rdd().first()[1] expected = np.array([self.band_1]) self.assertEqual(mapped_layer.zoom_level, self.tiled_raster_rdd.zoom_level) self.assertTrue((expected == actual.cells).all()) def test_map_cells_func_raster(self): def test_func(cells, nd): cells[cells >= 3.0] = nd return cells actual = self.raster_rdd.map_cells(test_func).to_numpy_rdd().first()[1] negative_band = np.array([ [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0]]) expected = np.array([self.band_1, self.band_2, negative_band]) self.assertTrue((expected == actual.cells).all()) def test_map_cells_lambda_raster(self): actual = self.raster_rdd.map_cells(lambda cells, nd: cells + nd).to_numpy_rdd().first()[1] self.assertTrue((0.0 == actual.cells[0, :]).all()) self.assertTrue((self.band_1 == actual.cells[1, :]).all()) self.assertTrue((self.band_2 == actual.cells[2, :]).all()) def test_map_cells_func_tiled(self): def test_func(cells, nd): cells[cells >= 3.0] = nd return cells actual = self.tiled_raster_rdd.map_cells(test_func).to_numpy_rdd().first()[1] negative_band = np.array([ [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0], [-1.0, -1.0, -1.0, -1.0, -1.0]]) expected = np.array([self.band_1, self.band_2, negative_band]) self.assertTrue((expected == actual.cells).all()) def test_map_cells_lambda_tiled(self): mapped_layer = self.tiled_raster_rdd.map_cells(lambda cells, nd: cells + nd) actual = mapped_layer.to_numpy_rdd().first()[1] self.assertTrue((0.0 == actual.cells[0, :]).all()) self.assertTrue((self.band_1 == actual.cells[1, :]).all()) self.assertTrue((self.band_2 == actual.cells[2, :]).all()) self.assertEqual(mapped_layer.zoom_level, self.tiled_raster_rdd.zoom_level)
def test_tile_to_metadata_layout(self): tiled = self.layer.tile_to_layout(layout=self.metadata) self.assertEqual(tiled.layer_metadata.extent, Extent(0, 0, 10, 6)) self.assertDictEqual(tiled.layer_metadata.to_dict(), self.metadata.to_dict())