def load(cls, sc: SparkContext, path: str) -> SpatialRDD: jvm = sc._jvm line_string_rdd = LineStringRDD() srdd = SpatialObjectLoaderAdapter(jvm).load_line_string_spatial_rdd( sc._jsc, path) line_string_rdd.set_srdd(srdd) return line_string_rdd
def compare_count(self, spatial_rdd: LineStringRDD, envelope: Envelope, count: int): spatial_rdd.analyze() assert count == spatial_rdd.approximateTotalCount assert envelope == spatial_rdd.boundaryEnvelope
def test_spatial_knn_query_using_index(self): line_string_rdd = LineStringRDD(self.sc, input_location, splitter, True) line_string_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result = KNNQuery.SpatialKnnQuery(line_string_rdd, self.query_point, 5, False) assert result.__len__() > -1 assert result[0].getUserData() is not None
def test_saving_to_disc_index_point(self, remove_spatial_rdd_disc_dir): from tests.properties.linestring_properties import input_location, splitter, num_partitions linestring_rdd = LineStringRDD( self.sc, input_location, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY ) linestring_rdd.buildIndex(IndexType.RTREE, False) linestring_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_object_location, "line_string_index"))
def test_linestring_rdd(self): linestring_rdd = LineStringRDD( sparkContext=self.sc, InputLocation=linestring_rdd_input_location, startOffset=0, endOffset=7, splitter=FileDataSplitter.CSV, carryInputData=True) wkt = "LINESTRING (-112.506968 45.98186, -112.506968 45.983586, -112.504872 45.983586, -112.504872 45.98186)" collected_linestring_rdd = linestring_rdd.getRawSpatialRDD().collect() assert wkt == collected_linestring_rdd[0].geom.wkt
def test_spatial_range_query_using_index(self): spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 999 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].getUserData() is not None
def test_mbr(self): linestring_rdd = LineStringRDD(sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) rectangle_rdd = linestring_rdd.MinimumBoundingRectangle() result = rectangle_rdd.rawSpatialRDD.collect() for el in result: print(el) assert result.__len__() > -1
def readToLineStringRDD(cls, sc: SparkContext, inputPath: str) -> LineStringRDD: """ :param sc: :param inputPath: :return: """ ShapefileReader.validate_imports() jvm = sc._jvm jsc = sc._jsc srdd = jvm.ShapefileReader.readToLineStringRDD( jsc, inputPath ) spatial_rdd = LineStringRDD() spatial_rdd.set_srdd(srdd) return spatial_rdd
def test_saving_to_disc_spatial_rdd_linestring(self, remove_spatial_rdd_disc_dir): from tests.properties.linestring_properties import input_location, splitter, num_partitions linestring_rdd = LineStringRDD( self.sc, input_location, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY ) linestring_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_object_location, "line_string"))
def test_build_index_without_set_grid(self): spatial_rdd = LineStringRDD(sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.buildIndex(IndexType.RTREE, False)
def test_outside_line_string_join_correctness(self): window_rdd = PolygonRDD( self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) object_rdd = LineStringRDD( self.sc.parallelize(self.test_outside_linestring_set), StorageLevel.MEMORY_ONLY) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() assert 0 == result.__len__() result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, False, False).collect() assert 0 == result_no_index.__len__()
def test_overlapped_linestring_join_correctness(self): window_rdd = PolygonRDD( self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) object_rdd = LineStringRDD( self.sc.parallelize(self.test_overlapped_linestring_set), StorageLevel.MEMORY_ONLY) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, True).collect() self.verify_join_result(result) result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, False, True).collect() self.verify_join_result(result_no_index)
def test_voronoi_spatial_partitioning(self): spatial_rdd = LineStringRDD(sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, partitions=10, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.VORONOI) for envelope in spatial_rdd.grids: print(envelope)
def test_geo_data_convert_polygon_rdd(self): linestring = LineString([(0.0, 1.0), (1, 1), (12.0, 1.0)]) wkt = 'LINESTRING (-71.160281 42.258729, -71.160837 42.259113, -71.161144 42.25932)' linestring2 = loads(wkt) linestrings = [ GeoData(geom=linestring, userData="a"), GeoData(geom=linestring2, userData="b"), ] rdd_data = self.sc.parallelize(linestrings) linestring_rdd = LineStringRDD(rdd_data) collected_data = linestring_rdd.rawSpatialRDD.collect() sorted_collected_data = sorted(collected_data, key=lambda x: x.userData) assert all([ geo1 == geo2 for geo1, geo2 in zip(linestrings, sorted_collected_data) ])
def create_linestring_rdd(self, location, splitter, num_partitions): rdd = LineStringRDD(self.sc, location, splitter, True, num_partitions) return LineStringRDD(rdd.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY)
def test_constructor(self): spatial_rdd_core = LineStringRDD(sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd_core, input_boundary, input_count) spatial_rdd = LineStringRDD() spatial_rdd_core = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd_core, input_boundary, input_count) spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD) self.compare_count(spatial_rdd, input_boundary, input_count) spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, "epsg:4326", "epsg:5070") self.compare_count(spatial_rdd, transformed_envelope, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions) self.compare_count(spatial_rdd, input_boundary_2, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True) self.compare_count(spatial_rdd, input_boundary_2, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions) self.compare_count(spatial_rdd, input_boundary, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True) self.compare_count(spatial_rdd, input_boundary, input_count) spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd, input_boundary, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd, input_boundary_2, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd, input_boundary_2, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd, input_boundary, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY) self.compare_count(spatial_rdd, input_boundary, input_count) spatial_rdd = LineStringRDD(spatial_rdd_core.rawJvmSpatialRDD, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") self.compare_count(spatial_rdd, transformed_envelope, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") self.compare_count(spatial_rdd, transformed_envelope_2, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, 0, 3, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") self.compare_count(spatial_rdd, transformed_envelope_2, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") self.compare_count(spatial_rdd, transformed_envelope, input_count) spatial_rdd = LineStringRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:5070") self.compare_count(spatial_rdd, transformed_envelope, input_count)
def load(cls, sc: SparkContext, path: str) -> SpatialRDD: jvm = sc._jvm line_string_rdd = LineStringRDD() srdd = jvm.ObjectSpatialRDDLoader.loadLineStringSpatialRDD(sc._jsc, path) line_string_rdd.set_srdd(srdd) return line_string_rdd
def test_empty_constructor(self): spatial_rdd = LineStringRDD(sparkContext=self.sc, InputLocation=input_location, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(grid_type) spatial_rdd.buildIndex(IndexType.RTREE, True) spatial_rdd_copy = LineStringRDD() spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD spatial_rdd_copy.analyze()
def getCenterLineStringRDDAsSpatialRDD(self) -> 'LineStringRDD': from geospark.core.SpatialRDD import LineStringRDD srdd = self._srdd.getCenterPolygonAsSpatialRDD() linestring_rdd = LineStringRDD() linestring_rdd.set_srdd(srdd) return linestring_rdd