def dynamic_rtree_int(self, query_rdd, num_partitions, use_legacy_apis, grid_type, index_type, expected_count): spatial_rdd = self.create_point_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) join_params = JoinParams(True, index_type, JoinBuildSide.LEFT) results = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect() self.sanity_check_flat_join_results(results) assert expected_count == results.__len__()
def test_spatial_join_to_spatial_rdd(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False) poi_point_rdd.analyze() areas_polygon_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True)) sedona_rdd = jvm_sedona_rdd.to_rdd().collect() assert sedona_rdd.__len__() == 5
def test_dynamic_index_int(self, num_partitions, use_legacy_apis, grid_type, index_type, intersects): query_rdd = self.create_polygon_rdd(query_polygon_set, splitter, num_partitions) spatial_rdd = self.create_polygon_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis) join_params = JoinParams(intersects, index_type, JoinBuildSide.LEFT) result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect() self.sanity_check_flat_join_results(result) expected_count = self.get_expected_with_original_duplicates_count(intersects) \ if self.expect_to_preserve_original_duplicates(grid_type) else self.get_expected_count(intersects) assert expected_count == result.__len__()
def test_dynamic_index_int(self, num_partitions, grid_type, index_type): query_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions) spatial_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions) self.partition_rdds(query_rdd, spatial_rdd, grid_type) join_params = JoinParams(True, index_type, JoinBuildSide.LEFT) result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect() self.sanity_check_flat_join_results(result) expected_count = match_with_original_duplicates_count \ if self.expect_to_preserve_original_duplicates(grid_type) else match_count assert expected_count == result.__len__()
def spatialJoin(cls, queryWindowRDD: SpatialRDD, objectRDD: SpatialRDD, joinParams: JoinParams) -> RDD: """ :param queryWindowRDD: SpatialRDD :param objectRDD: SpatialRDD :param joinParams: JoinParams :return: """ jvm = queryWindowRDD._jvm sc = queryWindowRDD._sc jvm_join_params = joinParams.jvm_instance(jvm) srdd = jvm.JoinQuery.spatialJoin(queryWindowRDD._srdd, objectRDD._srdd, jvm_join_params) serialized = JvmSedonaPythonConverter(jvm).\ translate_spatial_pair_rdd_to_python(srdd) return RDD(serialized, sc, SedonaPickler())
def test_spatial_join_query_and_build_index_on_polygons_on_the_fly(self): query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset, polygon_rdd_splitter, True) object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.analyze() object_rdd.spatialPartitioning(join_query_partitionin_type) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) for i in range(each_query_loop_times): join_params = JoinParams(False, polygon_rdd_index_type, JoinBuildSide.LEFT) resultSize = JoinQuery.spatialJoin(query_window_rdd, object_rdd, join_params).count()
def test_spatial_join_to_df(self): poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False) areas_polygon_rdd = WktReader.readToGeometryRDD( self.sc, areas_csv_path, 1, False, False) poi_point_rdd.analyze() areas_polygon_rdd.analyze() poi_point_rdd.spatialPartitioning(GridType.QUADTREE) areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner()) jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams()) sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"], self.spark) assert sedona_df.count() == 5 assert sedona_df.columns == [ "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id", "poi_name" ]