def read_singleband_geotrellis(self, options=None): if options is None: result = get(BaseTestClass.geopysc, SPATIAL, self.dir_path) else: result = get(BaseTestClass.geopysc, SPATIAL, self.dir_path, maxTileSize=256) return [tile[1] for tile in result.to_numpy_rdd().collect()]
def read_multiband_geotrellis(self, opt=options): self.client.putObject(self.bucket, self.key, self.data) result = get(BaseTestClass.geopysc, SPATIAL, self.uri, opt) return [tile[1] for tile in result.to_numpy_rdd().collect()]
class BaseTestClass(unittest.TestCase): if 'TRAVIS' in os.environ: master_str = "local[2]" else: master_str = "local[*]" geopysc = GeoPyContext(master=master_str, appName="test") dir_path = geotiff_test_path("all-ones.tif") rdd = get(geopysc, SPATIAL, dir_path) value = rdd.to_numpy_rdd().collect()[0] projected_extent = value[0] extent = projected_extent.extent expected_tile = value[1]['data'] (_, rows, cols) = expected_tile.shape layout = TileLayout(1, 1, cols, rows)
class CatalogTest(BaseTestClass): rdd = get(BaseTestClass.geopysc, SPATIAL, geotiff_test_path("srtm_52_11.tif")) metadata = rdd.collect_metadata() laid_out = rdd.tile_to_layout(metadata) reprojected = laid_out.reproject(target_crs="EPSG:3857", scheme=ZOOM) result = reprojected.pyramid(start_zoom=11, end_zoom=1) dir_path = geotiff_test_path("catalog/") uri = "file://{}".format(dir_path) layer_name = "catalog-test" @pytest.fixture(scope='class', autouse=True) def tearDown(self): yield BaseTestClass.geopysc.pysc._gateway.close() def test_read(self): for x in range(11, 0, -1): actual_layer = read(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, x) expected_layer = self.result[11 - x] self.assertDictEqual(actual_layer.layer_metadata.to_dict(), expected_layer.layer_metadata.to_dict()) def test_read_value(self): tiled = read_value(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 11, 1450, 966) self.assertEqual(tiled['data'].shape, (1, 256, 256)) def test_bad_read_value(self): tiled = read_value(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 11, 1450, 2000) self.assertEqual(tiled, None) def test_query(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 11, intersection) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query_partitions(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 11, intersection, numPartitions=2) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_query_crs(self): intersection = box(8348915.46680623, 543988.943201519, 8348915.4669, 543988.943201520) queried = query(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 11, intersection, proj_query=3857) self.assertEqual(queried.to_numpy_rdd().first()[0], SpatialKey(1450, 996)) def test_read_metadata(self): layer = read(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 5) actual_metadata = layer.layer_metadata expected_metadata = read_layer_metadata(BaseTestClass.geopysc, SPATIAL, self.uri, self.layer_name, 5) self.assertEqual(actual_metadata.to_dict(), expected_metadata.to_dict()) def test_layer_ids(self): ids = get_layer_ids(BaseTestClass.geopysc, self.uri) self.assertTrue(len(ids) == 11)
# set SPARK_HOME os.environ["SPARK_HOME"] = r"/usr/lib/spark" from geopyspark.geopycontext import GeoPyContext from geopyspark.geotrellis.constants import SPATIAL from geopyspark.geotrellis.geotiff_rdd import get from shapely.geometry import shape # Create the GeoPyContext geopysc = GeoPyContext(appName="example", master="yarn") # read in a raster saved locally-- a 5x5 degree tile # of float biomass data in the central Amazon raster_rdd = get(geopysc=geopysc, rdd_type=SPATIAL, uri='/tmp/tile__0_0.tif', options={'numPartitions': 100}) tiled_rdd = raster_rdd.to_tiled_layer() # load the admin2 level geometries that we'll use to summarize our data with open('all_within_tile_0_0.geojson') as f: txt = json.load(f) with open('out.csv', 'w') as thefile: csvwriter = csv.writer(thefile) for f in txt['features']: geom = shape(f['geometry']) sum_val = tiled_rdd.polygonal_sum(geometry=geom, data_type=float) props = f['properties']
class Multiband(GeoTiffIOTest, BaseTestClass): dir_path = geotiff_test_path("one-month-tiles-multiband/") result = get(BaseTestClass.geopysc, SPATIAL, dir_path) @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.geopysc.pysc._gateway.close() def test_to_numpy_rdd(self, option=None): pyrdd = self.result.to_numpy_rdd() (key, tile) = pyrdd.first() self.assertEqual(tile['data'].shape, (2, 512, 512)) def test_collect_metadata(self, options=None): md = self.result.collect_metadata() self.assertTrue('+proj=longlat' in md.crs) self.assertTrue('+datum=WGS84' in md.crs) def test_collect_metadata_crs_override(self, options=None): md = self.result.collect_metadata(crs='EPSG:3857') self.assertTrue('+proj=merc' in md.crs) def test_cut_tiles(self, options=None): md = self.result.collect_metadata(tile_size=100) tiles = self.result.cut_tiles(md) records_before = self.result.srdd.rdd().count() records_after = tiles.srdd.rdd().count() self.assertTrue(records_after > records_before) def test_reproject(self, options=None): tiles = self.result.reproject("EPSG:3857") md = tiles.collect_metadata() self.assertTrue('+proj=merc' in md.crs) def test_to_tiled_raster(self): md = self.result.collect_metadata() tiled = self.result.tile_to_layout(md) converted = self.result.to_tiled_layer() self.assertDictEqual(tiled.layer_metadata.to_dict(), converted.layer_metadata.to_dict()) def test_to_int(self): arr = np.array([[0.4324323432124, 0.0, 0.0], [1.0, 1.0, 1.0]], dtype=float) epsg_code = 3857 extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 10.0, 'ymax': 10.0} projected_extent = {'extent': extent, 'epsg': epsg_code} tile = {'data': arr, 'no_data_value': float('nan')} rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)]) raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd) converted = raster_rdd.convert_data_type(INT32) arr = converted.to_numpy_rdd().first()[1]['data'] self.assertEqual(arr.dtype, np.int64) def test_to_boolraw(self): converted = self.result.convert_data_type(BOOLRAW) arr = converted.to_numpy_rdd().first()[1]['data'] self.assertEqual(arr.dtype, np.uint8)