def test_creating_point_rdd(self): point_rdd = PointRDD(self.spark._sc, point_path, 4, FileDataSplitter.WKT, True) point_rdd.analyze() cnt = point_rdd.countWithoutDuplicates() assert cnt == 12872, f"Point RDD should have 12872 but found {cnt}"
def test_empty_constructor_test(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd_copy = PointRDD() object_rdd_copy.rawJvmSpatialRDD = object_rdd.rawJvmSpatialRDD object_rdd_copy.analyze()
def test_empty_constructor(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False) spatial_rdd_copy = PointRDD() spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD spatial_rdd_copy.analyze()
def test_spatial_join_query(self): point_rdd = PointRDD(self.sc, point_path, 4, FileDataSplitter.WKT, True) polygon_rdd = PolygonRDD(self.sc, counties_path, 2, 3, FileDataSplitter.WKT, True) point_rdd.analyze() point_rdd.spatialPartitioning(GridType.KDBTREE) polygon_rdd.spatialPartitioning(point_rdd.getPartitioner()) result = JoinQuery.SpatialJoinQuery(point_rdd, polygon_rdd, True, False) print(result.count())
def test_equal_partitioning(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=False, partitions=10, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.EQUALGRID) for envelope in spatial_rdd.grids: print("PointRDD spatial partitioning grids: " + str(envelope)) assert spatial_rdd.countWithoutDuplicates( ) == spatial_rdd.countWithoutDuplicatesSPRDD()
def test_distance_join_query(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.analyze() object_rdd.spatialPartitioning(GridType.QUADTREE) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, False, True).count()
def test_r_tree_spatial_partitioning(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=True, partitions=10, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.analyze() spatial_rdd.spatialPartitioning(GridType.RTREE) for envelope in spatial_rdd.grids: print(envelope) assert spatial_rdd.countWithoutDuplicates( ) == spatial_rdd.countWithoutDuplicatesSPRDD()
def test_spatial_join_query_and_build_index_on_points_on_the_fly(self): query_window = PolygonRDD(self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset, polygon_rdd_splitter, True) object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.analyze() object_rdd.spatialPartitioning(join_query_partitionin_type) query_window.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window, True, False).count()
def test_spatial_join_query_and_build_index_on_polygons_on_the_fly(self): query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset, polygon_rdd_splitter, True) object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.analyze() object_rdd.spatialPartitioning(join_query_partitionin_type) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): join_params = JoinParams(False, polygon_rdd_index_type, JoinBuildSide.LEFT) resultSize = JoinQuery.spatialJoin(query_window_rdd, object_rdd, join_params).count()