def test_overlapped_linestring_join_correctness(self): window_rdd = PolygonRDD( self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) object_rdd = LineStringRDD( self.sc.parallelize(self.test_overlapped_linestring_set), StorageLevel.MEMORY_ONLY) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, True).collect() self.verify_join_result(result) result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, False, True).collect() self.verify_join_result(result_no_index)
def test_outside_point_join_correctness(self): self.once_before_all() window_rdd = PolygonRDD( self.sc.parallelize(self.test_polygon_window_set), StorageLevel.MEMORY_ONLY) object_rdd = PointRDD(self.sc.parallelize(self.test_outside_point_set), StorageLevel.MEMORY_ONLY) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() assert 0 == result.__len__() result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, False, False).collect() assert 0 == result_no_index.__len__()
def test_inside_point_join_correctness(self): self.once_before_all() window_rdd = PolygonRDD( self.sc.parallelize(self.test_polygon_window_set)) object_rdd = PointRDD(self.sc.parallelize(self.test_inside_point_set)) self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE) result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True, False).collect() self.verify_join_result(result) result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, False, False).collect() self.verify_join_result(result_no_index)
def test_nested_loop(self, num_partitions, use_legacy_apis, grid_type, intersects): query_rdd = self.create_polygon_rdd(query_polygon_set, splitter, num_partitions) spatial_rdd = self.create_polygon_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) result = JoinQuery.SpatialJoinQuery( spatial_rdd, query_rdd, False, intersects).collect() self.sanity_check_join_results(result) assert self.get_expected_count(intersects) == self.count_join_results(result)
def nested_loop(self, query_rdd, num_partitions, grid_type, use_legacy_apis, expected_count): spatial_rdd = self.create_point_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() self.sanity_check_join_results(result) assert expected_count == self.count_join_results(result)
def test_spatial_join_query(self): point_rdd = PointRDD(self.sc, point_path, 4, FileDataSplitter.WKT, True) polygon_rdd = PolygonRDD(self.sc, counties_path, 2, 3, FileDataSplitter.WKT, True) point_rdd.analyze() point_rdd.spatialPartitioning(GridType.KDBTREE) polygon_rdd.spatialPartitioning(point_rdd.getPartitioner()) result = JoinQuery.SpatialJoinQuery(point_rdd, polygon_rdd, True, False) print(result.count())
def test_index_int(self, num_partitions, use_legacy_apis, grid_type, index_type): query_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions) spatial_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) spatial_rdd.buildIndex(index_type, True) result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() self.sanity_check_join_results(result) assert match_count == self.count_join_results(result)
def test_nested_loop(self, num_partitions, use_legacy_apis, grid_type): query_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions) spatial_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() count = 0 for el in result: count += el[1].__len__() self.sanity_check_join_results(result) assert match_count == self.count_join_results(result)
def test_spatial_join_query_and_build_index_on_points_on_the_fly(self): query_window = PolygonRDD(self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset, polygon_rdd_splitter, True) object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.analyze() object_rdd.spatialPartitioning(join_query_partitionin_type) query_window.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window, True, False).count()
def test_spatial_join_query_with_polygon_rdd(self): query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") spatial_rdd.spatialPartitioning(grid_type) query_rdd.spatialPartitioning(spatial_rdd.grids) result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() assert result[1][0].getUserData() is not None for data in result: if data[1].__len__() != 0: for right_data in data[1]: assert right_data.getUserData() is not None