def dynamic_rtree_int(self, query_rdd, num_partitions, use_legacy_apis, grid_type, index_type, expected_count): spatial_rdd = self.create_point_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) join_params = JoinParams(True, index_type, JoinBuildSide.LEFT) results = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect() self.sanity_check_flat_join_results(results) assert expected_count == results.__len__()
def test_dynamic_index_int(self, num_partitions, use_legacy_apis, grid_type, index_type, intersects): query_rdd = self.create_polygon_rdd(query_polygon_set, splitter, num_partitions) spatial_rdd = self.create_polygon_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) join_params = JoinParams(intersects, index_type, JoinBuildSide.LEFT) result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect() self.sanity_check_flat_join_results(result) expected_count = self.get_expected_with_original_duplicates_count(intersects) \ if self.expect_to_preserve_original_duplicates(grid_type) else self.get_expected_count(intersects) assert expected_count == result.__len__()
def test_spatial_join_query_and_build_index_on_polygons_on_the_fly(self): query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset, polygon_rdd_splitter, True) object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.analyze() object_rdd.spatialPartitioning(join_query_partitionin_type) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): join_params = JoinParams(False, polygon_rdd_index_type, JoinBuildSide.LEFT) resultSize = JoinQuery.spatialJoin(query_window_rdd, object_rdd, join_params).count()