class SpatialKeySchemaTest(BaseTestClass): expected_keys = {'col': 7, 'row': 3} sc = BaseTestClass.pysc._jsc.sc() ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.SpatialKeyWrapper java_rdd = ew.testOut(sc) ser = ProtoBufSerializer(spatial_key_decoder, spatial_key_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = rdd.first()._asdict() @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def result_checker(self, actual_keys, expected_keys): self.assertDictEqual(actual_keys, expected_keys) def test_encoded_keyss(self): actual_encoded = [spatial_key_encoder(x) for x in self.rdd.collect()] proto_spatial_key = keyMessages_pb2.ProtoSpatialKey() proto_spatial_key.col = 7 proto_spatial_key.row = 3 expected_encoded = proto_spatial_key.SerializeToString() self.assertEqual(actual_encoded[0], expected_encoded) def test_decoded_extents(self): self.assertDictEqual(self.collected, self.expected_keys)
class ByteTileSchemaTest(BaseTestClass): tiles = [ Tile.from_numpy_array(np.int8([0, 0, 1, 1]).reshape(2, 2), -128), Tile.from_numpy_array(np.int8([1, 2, 3, 4]).reshape(2, 2), -128), Tile.from_numpy_array(np.int8([5, 6, 7, 8]).reshape(2, 2), -128) ] sc = BaseTestClass.pysc._jsc.sc() tw = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.ByteArrayTileWrapper java_rdd = tw.testOut(sc) ser = ProtoBufSerializer(tile_decoder, tile_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = rdd.collect() def test_encoded_tiles(self): expected_encoded = [to_pb_tile(x) for x in self.collected] for actual, expected in zip(self.tiles, expected_encoded): cells = actual.cells rows, cols = cells.shape self.assertEqual(expected.cols, cols) self.assertEqual(expected.rows, rows) self.assertEqual(expected.cellType.nd, actual.no_data_value) self.assertEqual(expected.cellType.dataType, mapped_data_types[actual.cell_type]) def test_decoded_tiles(self): for actual, expected in zip(self.collected, self.tiles): self.assertTrue((actual.cells == expected.cells).all()) self.assertTrue(actual.cells.dtype == expected.cells.dtype) self.assertEqual(actual.cells.shape, actual.cells.shape)
class TupleSchemaTest(BaseTestClass): extent = { 'epsg': 2004, 'extent': { 'xmax': 1.0, 'xmin': 0.0, 'ymax': 1.0, 'ymin': 0.0 }, 'proj4': None } arr = np.int8([0, 0, 1, 1]).reshape(2, 2) bands = [arr, arr, arr] multiband_tile = np.array(bands) multiband_dict = Tile(multiband_tile, 'BYTE', -128) sc = BaseTestClass.pysc._jsc.sc() ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.TupleWrapper java_rdd = ew.testOut(sc) decoder = create_partial_tuple_decoder(key_type="ProjectedExtent") encoder = create_partial_tuple_encoder(key_type="ProjectedExtent") ser = ProtoBufSerializer(decoder, encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = rdd.collect() @pytest.mark.skipif( 'TRAVIS' in os.environ, reason="Encoding using methods in Main causes issues on Travis") def test_encoded_tuples(self): proto_tuple = tupleMessages_pb2.ProtoTuple() self.extent['extent'] = Extent(**self.extent['extent']) proto_extent = to_pb_projected_extent(ProjectedExtent(**self.extent)) proto_multiband = to_pb_multibandtile(self.multiband_dict) proto_tuple.projectedExtent.CopyFrom(proto_extent) proto_tuple.tiles.CopyFrom(proto_multiband) bs = proto_tuple.SerializeToString() expected_encoded = [self.ser.dumps(x) for x in self.collected] for expected in expected_encoded: self.assertEqual(bs, expected) def test_decoded_tuples(self): expected_tuples = [(self.extent, self.multiband_dict), (self.extent, self.multiband_dict), (self.extent, self.multiband_dict)] for actual, expected in zip(self.collected, expected_tuples): (actual_extent, actual_tile) = actual (expected_extent, expected_tile) = expected self.assertTrue((actual_tile.cells == expected_tile.cells).all()) self.assertDictEqual(actual_extent._asdict(), expected_extent)
class TemporalProjectedExtentSchemaTest(BaseTestClass): extents = [ Extent(0.0, 0.0, 1.0, 1.0), Extent(1.0, 2.0, 3.0, 4.0), Extent(5.0, 6.0, 7.0, 8.0), ] time = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') expected_tpextents = [ TemporalProjectedExtent(epsg=2004, extent=extents[0], instant=time)._asdict(), TemporalProjectedExtent(epsg=2004, extent=extents[1], instant=time)._asdict(), TemporalProjectedExtent(epsg=2004, extent=extents[2], instant=time)._asdict() ] sc = BaseTestClass.pysc._jsc.sc() ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.TemporalProjectedExtentWrapper java_rdd = ew.testOut(sc) ser = ProtoBufSerializer(temporal_projected_extent_decoder, temporal_projected_extent_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = [tpex._asdict() for tpex in rdd.collect()] @pytest.fixture(scope='class', autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def result_checker(self, actual_tpe, expected_tpe): for actual, expected in zip(actual_tpe, expected_tpe): self.assertDictEqual(actual, expected) def test_encoded_tpextents(self): actual_encoded = [ temporal_projected_extent_encoder(x) for x in self.rdd.collect() ] for x in range(0, len(self.expected_tpextents)): self.expected_tpextents[x]['extent'] = Extent( **self.expected_tpextents[x]['extent']) expected_encoded = [ to_pb_temporal_projected_extent(TemporalProjectedExtent(**ex)).SerializeToString() \ for ex in self.expected_tpextents ] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected) def test_decoded_tpextents(self): self.result_checker(self.collected, self.expected_tpextents)
def get(uri, extensions=['.shp', '.SHP'], num_partitions=None, s3_client=DEFAULT_S3_CLIENT): """Creates an ``RDD[Feature]`` from Shapefile(s) that are located on the local file system, ``HDFS``, or ``S3``. The ``properties`` of the ``Feautre``\s in the ``RDD`` will contain the attributes of their respective geometry in a ``dict``. All keys and values of each ``dict`` will be ``str``\s regardless of how the attribute is represented in the Shapefile. Note: This feature is currently experimental and will most likely change in the coming versions of GPS. Note: When reading from S3, the desired files **must** be publicly readable. Otherwise, you will get 403 errors. Due to the nature of how GPS reads Shapefile(s) from S3, the ``mock`` S3 Client cannot currently be used. Args: uri (str or [str]): The path or list of paths to the desired Shapfile(s)/directory(ies). extensions ([str], optional): A list of the extensions that the Shapefile(s) have. These are ``.shp`` and ``.SHP`` by default. num_partitions (int, optional): The number of partitions Spark will make when the ``RDD`` is created. If ``None``, then the ``defaultParallelism`` will be used. s3_client (str, optional): Which ``S3Cleint`` to use when reading GeoTiffs from S3. There are currently two options: ``default`` and ``mock``. Defaults to :const:`~geopyspark.geotrellis.constants.DEFAULT_S3_CLIENT`. Note: ``mock`` should only be used in unit tests and debugging. Returns: ``RDD[:class:`~geopyspark.geotrellis.Feature`]`` """ pysc = get_spark_context() num_partitions = num_partitions or pysc.defaultParallelism shapefile = pysc._gateway.jvm.geopyspark.geotools.shapefile.ShapefileRDD if isinstance(uri, (list, tuple)): jrdd = shapefile.get(pysc._jsc.sc(), uri, extensions, num_partitions, s3_client) else: jrdd = shapefile.get(pysc._jsc.sc(), [uri], extensions, num_partitions, s3_client) ser = ProtoBufSerializer(feature_decoder, None) return create_python_rdd(jrdd, ser)
class MultibandSchemaTest(BaseTestClass): arr = np.int8([0, 0, 1, 1]).reshape(2, 2) no_data = -128 arr_dict = Tile(arr, 'BYTE', no_data) band_dicts = [arr_dict, arr_dict, arr_dict] bands = [arr, arr, arr] multiband_tile = np.array(bands) multiband_dict = Tile(multiband_tile, 'BYTE', no_data) sc = BaseTestClass.pysc._jsc.sc() mw = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.ArrayMultibandTileWrapper java_rdd = mw.testOut(sc) ser = ProtoBufSerializer(multibandtile_decoder, multibandtile_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = rdd.collect() @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_encoded_multibands(self): actual_encoded = [multibandtile_encoder(x) for x in self.collected] proto_tile = tileMessages_pb2.ProtoTile() cell_type = tileMessages_pb2.ProtoCellType() cell_type.nd = self.no_data cell_type.hasNoData = True cell_type.dataType = 1 proto_tile.cols = 2 proto_tile.rows = 2 proto_tile.sint32Cells.extend(self.arr.flatten().tolist()) proto_tile.cellType.CopyFrom(cell_type) proto_multiband = tileMessages_pb2.ProtoMultibandTile() proto_multiband.tiles.extend([proto_tile, proto_tile, proto_tile]) bs = proto_multiband.SerializeToString() expected_encoded = [bs, bs, bs] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected) def test_decoded_multibands(self): expected_multibands = [ self.multiband_dict, self.multiband_dict, self.multiband_dict ] for actual, expected in zip(self.collected, expected_multibands): self.assertTrue((actual.cells == expected.cells).all())
class FeatureCellValueSchemaTest(BaseTestClass): sc = BaseTestClass.pysc._jsc.sc() fw = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.FeatureCellValueWrapper java_rdd = fw.testOut(sc) ser = ProtoBufSerializer(feature_cellvalue_decoder, feature_cellvalue_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) point = Point(0, 2) line_1 = LineString( [point, Point(1, 3), Point(2, 4), Point(3, 5), Point(4, 6)]) line_2 = LineString( [Point(5, 7), Point(6, 8), Point(7, 9), Point(8, 10), Point(9, 11)]) multi_line = MultiLineString([line_1, line_2]) features = [ Feature(point, CellValue(2, 1)), Feature(line_1, CellValue(1, 0)), Feature(multi_line, CellValue(1, 0)) ] collected = [f for f in rdd.collect()] @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_decoder(self): geoms = [g.geometry for g in self.collected] ms = [m.properties for m in self.collected] for x in self.features: self.assertTrue(x.geometry in geoms) self.assertTrue(x.properties in ms) def test_encoder(self): expected_encoded = [ to_pb_feature_cellvalue(f).SerializeToString() for f in self.features ] actual_encoded = [feature_cellvalue_encoder(f) for f in self.collected] for x in expected_encoded: self.assertTrue(x in actual_encoded)
class ExtentSchemaTest(BaseTestClass): ew = BaseTestClass.pysc._gateway.jvm.geopyspark.geotrellis.tests.schemas.ExtentWrapper java_rdd = ew.testOut(BaseTestClass.pysc._jsc.sc()) ser = ProtoBufSerializer(extent_decoder, extent_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = rdd.collect() expected_extents = [{ "xmin": 0.0, "ymin": 0.0, "xmax": 1.0, "ymax": 1.0 }, { "xmin": 1.0, "ymin": 2.0, "xmax": 3.0, "ymax": 4.0 }, { "xmin": 5.0, "ymin": 6.0, "xmax": 7.0, "ymax": 8.0 }] @pytest.fixture(scope='class', autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def result_checker(self, actual_result, expected_result): for actual, expected in zip(actual_result, expected_result): self.assertDictEqual(actual, expected) def test_decoded_extents(self): actual_decoded = [ from_pb_extent(ex)._asdict() for ex in self.collected ] self.result_checker(actual_decoded, self.expected_extents) def test_encoded_extents(self): expected_encoded = [ to_pb_extent(Extent(**x)).SerializeToString() for x in self.expected_extents ] actual_encoded = [extent_encoder(x) for x in self.collected] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected)
class ProjectedExtentSchemaTest(BaseTestClass): projected_extents = [ {'epsg': 2004, 'extent': {'xmax': 1.0, 'xmin': 0.0, 'ymax': 1.0, 'ymin': 0.0}, 'proj4': None}, {'epsg': 2004, 'extent': {'xmax': 3.0, 'xmin': 1.0, 'ymax': 4.0, 'ymin': 2.0}, 'proj4': None}, {'epsg': 2004, 'extent': {'xmax': 7.0, 'xmin': 5.0, 'ymax': 8.0, 'ymin': 6.0}, 'proj4': None}] sc = BaseTestClass.pysc._jsc.sc() ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.ProjectedExtentWrapper java_rdd = ew.testOut(sc) ser = ProtoBufSerializer(projected_extent_decoder, projected_extent_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = [pex._asdict() for pex in rdd.collect()] @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def result_checker(self, actual_pe, expected_pe): for actual, expected in zip(actual_pe, expected_pe): self.assertDictEqual(actual, expected) def test_encoded_pextents(self): actual_encoded = [projected_extent_encoder(x) for x in self.rdd.collect()] for x in range(0, len(self.projected_extents)): self.projected_extents[x]['extent'] = Extent(**self.projected_extents[x]['extent']) expected_encoded = [ to_pb_projected_extent(ProjectedExtent(**ex)).SerializeToString() for ex in self.projected_extents ] for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected) def test_decoded_pextents(self): self.result_checker(self.collected, self.projected_extents)
def rasterize_features(features, crs, zoom, cell_type=CellType.FLOAT64, options=None, zindex_cell_type=CellType.INT8, partition_strategy=None): """Rasterizes a collection of :class:`~geopyspark.vector_pipe.Feature`\s. Args: features (pyspark.RDD[Feature]): A Python ``RDD`` that contains :class:`~geopyspark.vector_pipe.Feature`\s. Note: The ``properties`` of each ``Feature`` must be an instance of :class:`~geopyspark.vector_pipe.CellValue`. crs (str or int): The CRS of the input geometry. zoom (int): The zoom level of the output raster. Note: Not all rasterized ``Feature``\s may be present in the resulting layer if the ``zoom`` is not high enough. cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the cells should be when created. Defaults to ``CellType.FLOAT64``. options (:class:`~geopyspark.geotrellis.RasterizerOptions`, optional): Pixel intersection options. zindex_cell_type (str or :class:`~geopyspark.geotrellis.constants.CellType`): Which data type the ``Z-Index`` cells are. Defaults to ``CellType.INT8``. partition_strategy (:class:`~geopyspark.HashPartitionStrategy` or :class:`~geopyspark.SpatialPartitioinStrategy`, optional): Sets the ``Partitioner`` for the resulting layer and how many partitions it has. Default is, ``None``. If ``None``, then the output layer will have the default ``Partitioner`` and a number of paritions that was determined by the method. If ``partition_strategy`` is set but has no ``num_partitions``, then the resulting layer will have the ``Partioner`` specified in the strategy with the with same number of partitions the source layer had. If ``partition_strategy`` is set and has a ``num_partitions``, then the resulting layer will have the ``Partioner`` and number of partitions specified in the strategy. Returns: :class:`~geopyspark.geotrellis.layer.TiledRasterLayer` """ if isinstance(crs, int): crs = str(crs) pysc = get_spark_context() rasterizer = pysc._gateway.jvm.geopyspark.geotrellis.SpatialTiledRasterLayer.rasterizeFeaturesWithZIndex ser = ProtoBufSerializer(feature_cellvalue_decoder, feature_cellvalue_encoder) reserialized_rdd = features._reserialize(ser) srdd = rasterizer(reserialized_rdd._jrdd.rdd(), crs, zoom, CellType(cell_type).value, options, CellType(zindex_cell_type).value, partition_strategy) return TiledRasterLayer(LayerType.SPATIAL, srdd)
def _get_rdd(self, jrdd): ser = ProtoBufSerializer(feature_decoder, feature_encoder) return create_python_rdd(jrdd, ser)
class SpaceTimeKeySchemaTest(BaseTestClass): time = datetime.datetime.strptime("2016-08-24T09:00:00Z", '%Y-%m-%dT%H:%M:%SZ') expected_keys = [ SpaceTimeKey(7, 3, time)._asdict(), SpaceTimeKey(9, 4, time)._asdict(), SpaceTimeKey(11, 5, time)._asdict(), ] sc = BaseTestClass.pysc._jsc.sc() ew = BaseTestClass.pysc._jvm.geopyspark.geotrellis.tests.schemas.SpaceTimeKeyWrapper java_rdd = ew.testOut(sc) ser = ProtoBufSerializer(space_time_key_decoder, space_time_key_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) collected = [stk._asdict() for stk in rdd.collect()] @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def result_checker(self, actual_keys, expected_keys): for actual, expected in zip(actual_keys, expected_keys): self.assertDictEqual(actual, expected) def test_encoded_keyss(self): expected_encoded = [ space_time_key_encoder(x) for x in self.rdd.collect() ] actual_encoded = [] for x in self.expected_keys: proto_space_time_key = keyMessages_pb2.ProtoSpaceTimeKey() proto_space_time_key.col = x['col'] proto_space_time_key.row = x['row'] proto_space_time_key.instant = _convert_to_unix_time(x['instant']) actual_encoded.append(proto_space_time_key.SerializeToString()) for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected) expected_encoded = [ space_time_key_encoder(x) for x in self.rdd.collect() ] actual_encoded = [] for x in self.expected_keys: proto_space_time_key = keyMessages_pb2.ProtoSpaceTimeKey() proto_space_time_key.col = x['col'] proto_space_time_key.row = x['row'] proto_space_time_key.instant = _convert_to_unix_time(x['instant']) actual_encoded.append(proto_space_time_key.SerializeToString()) for actual, expected in zip(actual_encoded, expected_encoded): self.assertEqual(actual, expected) def test_decoded_extents(self): self.result_checker(self.collected, self.expected_keys)
class FeatureSchemaTest(BaseTestClass): sc = BaseTestClass.pysc._jsc.sc() fw = BaseTestClass.pysc._jvm.geopyspark.vectorpipe.tests.schemas.FeatureWrapper java_rdd = fw.testOut(sc) ser = ProtoBufSerializer(feature_decoder, feature_encoder) rdd = RDD(java_rdd, BaseTestClass.pysc, AutoBatchedSerializer(ser)) metadata = Properties(element_id=1993, user="******", uid=19144, changeset=10, version=24, minor_version=5, timestamp=parser.parse("2012-06-05T07:00:00UTC"), visible=True, tags={ 'amenity': 'embassy', 'diplomatic': 'embassy', 'country': 'azavea' }) point = Point(0, 2) line_1 = LineString( [point, Point(1, 3), Point(2, 4), Point(3, 5), Point(4, 6)]) line_2 = LineString( [Point(5, 7), Point(6, 8), Point(7, 9), Point(8, 10), Point(9, 11)]) multi_line = MultiLineString([line_1, line_2]) features = [ Feature(point, metadata), Feature(line_1, metadata), Feature(multi_line, metadata) ] collected = [f for f in rdd.collect()] @pytest.fixture(autouse=True) def tearDown(self): yield BaseTestClass.pysc._gateway.close() def test_decoder(self): geoms = [g.geometry for g in self.collected] ms = [m.properties for m in self.collected] for x in self.features: self.assertTrue(x.geometry in geoms) self.assertTrue(x.properties in ms) def test_encoder(self): expected_encoded = [ to_pb_feature(f).SerializeToString() for f in self.features ] actual_encoded = [feature_encoder(f) for f in self.collected] for x in expected_encoded: self.assertTrue(x in actual_encoded)