def test_all_zeros(self): arr = np.zeros((1, 16, 16)) tile = {'data': arr, 'no_data_value': -500} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) min_max = raster_rdd.get_min_max() self.assertEqual((0.0, 0.0), min_max)
def test_floating(self): arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0], [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]], dtype=float) tile = {'data': arr, 'no_data_value': float('nan')} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) min_max = raster_rdd.get_min_max() self.assertEqual((0.0, 2.0), min_max)
def test_multibands(self): arr = np.array( [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]], dtype=int) tile = {'data': arr, 'no_data_value': -500} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) min_max = raster_rdd.get_min_max() self.assertEqual((1.0, 4.0), min_max)
def test_collection_python_rdd(self): data = rasterio.open(self.dir_path) tile_dict = {'data': data.read(), 'no_data_value': data.nodata} rasterio_rdd = self.geopysc.pysc.parallelize([(self.projected_extent, tile_dict)]) raster_rdd = RasterRDD.from_numpy_rdd(self.geopysc, SPATIAL, rasterio_rdd) result = raster_rdd.collect_metadata(extent=self.extent, layout=self.layout) self.assertEqual(result.extent, self.extent) self.assertEqual(result.layout_definition.extent, self.extent) self.assertEqual(result.layout_definition.tileLayout, self.layout)
def test_all_zeros(self): arr = np.zeros((1, 16, 16)) tile = {'data': arr, 'no_data_value': -500} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) value_map = {0: 1} result = raster_rdd.reclassify(value_map, int).to_numpy_rdd().first()[1]['data'] self.assertTrue((result == 1).all())
def test_to_int(self): arr = np.array([[0.4324323432124, 0.0, 0.0], [1.0, 1.0, 1.0]], dtype=float) epsg_code = 3857 extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 10.0, 'ymax': 10.0} projected_extent = {'extent': extent, 'epsg': epsg_code} tile = {'data': arr, 'no_data_value': float('nan')} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) converted = raster_rdd.convert_data_type(INT32) arr = converted.to_numpy_rdd().first()[1]['data'] self.assertEqual(arr.dtype, np.int64)
def test_if_working(self): arr = np.zeros((1, 16, 16)) epsg_code = 3857 extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 10.0, 'ymax': 10.0} tile = {'data': arr, 'no_data_value': False} projected_extent = {'extent': extent, 'epsg': epsg_code} rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile) ]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) laid_out = raster_rdd.to_tiled_layer() result = PngRDD.makePyramid(laid_out, HOT)
def test_ignore_no_data_floats(self): arr = np.ones((1, 4, 4)) np.fill_diagonal(arr[0], float('nan')) tile = {'data': arr, 'no_data_value': float('nan')} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) value_map = {1.0: 0.0} result = raster_rdd.reclassify( value_map, float, replace_nodata_with=1.0).to_numpy_rdd().first()[1]['data'] self.assertTrue((result == np.identity(4)).all())
def test_ignore_no_data_ints(self): arr = np.ones((1, 16, 16), int) np.fill_diagonal(arr[0], NODATAINT) tile = {'data': arr, 'no_data_value': NODATAINT} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) value_map = {1: 0} result = raster_rdd.reclassify( value_map, int, replace_nodata_with=1).to_numpy_rdd().first()[1]['data'] self.assertTrue((result == np.identity(16, int)).all())
def test_persist(self): arr = np.array( [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]], dtype=int) tile = {'data': arr, 'no_data_value': -500} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) self.assertEqual(raster_rdd.is_cached, False) raster_rdd.persist(StorageLevel.MEMORY_ONLY) self.assertEqual(raster_rdd.is_cached, True) raster_rdd.unpersist() self.assertEqual(raster_rdd.is_cached, False)
def test_no_data_floats(self): arr = np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], dtype=float) tile = {'data': arr, 'no_data_value': float('nan')} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) value_map = {0.0: float('nan')} result = raster_rdd.reclassify(value_map, float).to_numpy_rdd().first()[1]['data'] for x in list(result.flatten()): self.assertTrue(math.isnan(x))
def test_wrong_cols_and_rows(self): arr = np.zeros((1, 250, 250)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = {'data': arr, 'no_data_value': False} projected_extent = {'extent': extent, 'epsg': epsg_code} rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile) ]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) metadata = raster_rdd.collect_metadata(tile_size=250) laid_out = raster_rdd.tile_to_layout(metadata) with pytest.raises(ValueError): laid_out.pyramid(start_zoom=12, end_zoom=1)
def test_multibands(self): arr = np.array( [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]], dtype=int) tile = {'data': arr, 'no_data_value': -500} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) value_map = {3: 10, 4: 20} result = raster_rdd.reclassify(value_map, int).to_numpy_rdd().first()[1]['data'] expected = np.array([[[10, 10, 10, 10]], [[10, 10, 10, 10]], [[10, 10, 10, 10]], [[20, 20, 20, 20]]], dtype=int) self.assertTrue((result == expected).all())
def test_floating_voint_ranges(self): arr = np.array([[[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0], [1.5, 1.5, 1.5, 1.5], [2.0, 2.0, 2.0, 2.0]]], dtype=float) tile = {'data': arr, 'no_data_value': float('nan')} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) value_map = {2.0: 5.0} result = raster_rdd.reclassify( value_map, float, LESSTHAN).to_numpy_rdd().first()[1]['data'] expected = np.array([[[5.0, 5.0, 5.0, 5.0], [5.0, 5.0, 5.0, 5.0], [5.0, 5.0, 5.0, 5.0]]], dtype=float) self.assertTrue((result[0, 2, ] == expected).all()) for x in result[0, 3, ]: self.assertTrue(math.isnan(x))
def test_correct_base(self): arr = np.zeros((1, 16, 16)) epsg_code = 3857 extent = Extent(0.0, 0.0, 10.0, 10.0) tile = {'data': arr, 'no_data_value': False} projected_extent = {'extent': extent, 'epsg': epsg_code} rdd = BaseTestClass.geopysc.pysc.parallelize([(projected_extent, tile) ]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) tile_layout = TileLayout(32, 32, 16, 16) new_extent = Extent(-20037508.342789244, -20037508.342789244, 20037508.342789244, 20037508.342789244) metadata = raster_rdd.collect_metadata(extent=new_extent, layout=tile_layout) laid_out = raster_rdd.tile_to_layout(metadata) result = laid_out.pyramid(start_zoom=5, end_zoom=1) self.pyramid_building_check(result)
def get(geopysc, rdd_type, uri, options=None, **kwargs): """Creates a ``RasterRDD`` from GeoTiffs that are located on the local file system, ``HDFS``, or ``S3``. Args: geopysc (geopyspark.GeoPyContext): The ``GeoPyContext`` being used this session. rdd_type (str): What the spatial type of the geotiffs are. This is represented by the constants: ``SPATIAL`` and ``SPACETIME``. Note: All of the GeoTiffs must have the same saptial type. uri (str): The path to a given file/directory. options (dict, optional): A dictionary of different options that are used when creating the RDD. This defaults to ``None``. If ``None``, then the RDD will be created using the default options for the given backend in GeoTrellis. Note: Key values in the ``dict`` should be in camel case, as this is the style that is used in Scala. These are the options when using the local file system or ``HDFS``: * **crs** (str, optional): The CRS that the output tiles should be in. The CRS must be in the well-known name format. If ``None``, then the CRS that the tiles were originally in will be used. * **timeTag** (str, optional): The name of the tiff tag that contains the time stamp for the tile. If ``None``, then the default value is: ``TIFFTAG_DATETIME``. * **timeFormat** (str, optional): The pattern of the time stamp for java.time.format.DateTimeFormatter to parse. If ``None``, then the default value is: ``yyyy:MM:dd HH:mm:ss``. * **maxTileSize** (int, optional): The max size of each tile in the resulting RDD. If the size is smaller than a read in tile, then that tile will be broken into tiles of the specified size. If ``None``, then the whole tile will be read in. * **numPartitions** (int, optional): The number of repartitions Spark will make when the data is repartitioned. If ``None``, then the data will not be repartitioned. * **chunkSize** (int, optional): How many bytes of the file should be read in at a time. If None, then files will be read in 65536 byte chunks. ``S3`` has the above options in addition to this: * **s3Client** (str, optional): Which ``S3Cleint`` to use when reading GeoTiffs. There are currently two options: ``default`` and ``mock``. If ``None``, ``defualt`` is used. Note: ``mock`` should only be used in unit tests and debugging. **kwargs: Option parameters can also be entered as keyword arguements. Note: Defining both ``options`` and ``kwargs`` will cause the ``kwargs`` to be ignored in favor of ``options``. Returns: :class:`~geopyspark.geotrellis.rdd.RasterRDD` """ geotiff_rdd = geopysc._jvm.geopyspark.geotrellis.io.geotiff.GeoTiffRDD key = geopysc.map_key_input(rdd_type, False) if kwargs and not options: options = kwargs if options: if isinstance(uri, list): srdd = geotiff_rdd.get(geopysc.sc, key, uri, options) else: srdd = geotiff_rdd.get(geopysc.sc, key, [uri], options) else: if isinstance(uri, list): srdd = geotiff_rdd.get(geopysc.sc, key, uri, {}) else: srdd = geotiff_rdd.get(geopysc.sc, key, [uri], {}) return RasterRDD(geopysc, rdd_type, srdd)