def test_circle_rdd(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) circle_rdd = CircleRDD(object_rdd, 0.1) collected_data = circle_rdd.getRawSpatialRDD().collect() print([geo_data.geom.wkt for geo_data in collected_data])
def test_distance_join_query(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.analyze() object_rdd.spatialPartitioning(GridType.QUADTREE) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, False, True).count()
def test_distance_join_query(self): object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.spatialPartitioning(GridType.QUADTREE) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) object_rdd.spatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY) assert object_rdd.spatialPartitionedRDD.is_cached query_window_rdd.spatialPartitionedRDD.persist( StorageLevel.MEMORY_ONLY) for _ in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, False, True).count()
def test_circle_rdd(self): spatial_rdd = PointRDD( self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY ) circle_rdd = CircleRDD(spatial_rdd, 0.5) circle_rdd.analyze() assert circle_rdd.approximateTotalCount == 3000 assert circle_rdd.rawSpatialRDD.take(1)[0].getUserData() == "testattribute0\ttestattribute1\ttestattribute2" assert circle_rdd.rawSpatialRDD.take(1)[0].geom.radius == 0.5
def test_distance_join_result_to_dataframe(self): point_csv_df = self.spark.\ read.\ format("csv").\ option("delimiter", ",").\ option("header", "false").load( area_lm_point_input_location ) point_csv_df.createOrReplaceTempView("pointtable") point_df = self.spark.sql( "select ST_Point(cast(pointtable._c0 as Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable" ) point_rdd = Adapter.toSpatialRdd(point_df, "arealandmark") point_rdd.analyze() polygon_wkt_df = self.spark.read.\ format("csv").\ option("delimiter", "\t").\ option("header", "false").load( mixed_wkt_geometry_input_location ) polygon_wkt_df.createOrReplaceTempView("polygontable") polygon_df = self.spark.\ sql("select ST_GeomFromWKT(polygontable._c0) as usacounty from polygontable") polygon_rdd = Adapter.toSpatialRdd(polygon_df, "usacounty") polygon_rdd.analyze() circle_rdd = CircleRDD(polygon_rdd, 0.2) point_rdd.spatialPartitioning(GridType.QUADTREE) circle_rdd.spatialPartitioning(point_rdd.getPartitioner()) point_rdd.buildIndex(IndexType.QUADTREE, True) join_result_pair_rdd = JoinQuery.\ DistanceJoinQueryFlat(point_rdd, circle_rdd, True, True) join_result_df = Adapter.toDf(join_result_pair_rdd, self.spark) join_result_df.printSchema() join_result_df.show()
def test_polygon_distance_join_with_crs_transformation(self): query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3857") window_rdd = CircleRDD(query_rdd, 0.1) object_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3857") object_rdd.rawJvmSpatialRDD.jsrdd.repartition(4) object_rdd.spatialPartitioning(GridType.RTREE) object_rdd.buildIndex(IndexType.RTREE, True) window_rdd.spatialPartitioning(object_rdd.grids) results = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, False).collect() assert results.__len__() == 5467 for data in results: for polygon_data in data[1]: assert Circle(data[0].geom, 0.1).covers(polygon_data.geom)
def test_indexed_rdd_assignment(self): object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.analyze() object_rdd.spatialPartitioning(GridType.QUADTREE) object_rdd.buildIndex(IndexType.QUADTREE, True) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) object_rdd.buildIndex(IndexType.RTREE, False) object_rdd.indexedRDD.persist(StorageLevel.MEMORY_ONLY) query_window_rdd.jvmSpatialPartitionedRDD.persist( StorageLevel.MEMORY_ONLY) query_window_rdd.jvmSpatialPartitionedRDD.count() object_rdd.indexedRDD.count() import time start = time.time() for _ in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count() diff = time.time() - start object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.analyze() object_rdd.spatialPartitioning(GridType.QUADTREE) object_rdd.buildIndex(IndexType.QUADTREE, True) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) object_rdd.buildIndex(IndexType.RTREE, False) start1 = time.time() for _ in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count()
def test_outside_polygon_distance_join_correctness(self): center_geometry_rdd = PolygonRDD( self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) window_rdd = CircleRDD(center_geometry_rdd, 0.1) object_rdd = PolygonRDD( self.sc.parallelize(self.test_outside_polygon_set), StorageLevel.MEMORY_ONLY) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True, True).collect() assert 0 == result.__len__() result_no_index = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, False, True).collect() assert 0 == result_no_index.__len__()