Exemple #1
0
    def dynamic_rtree_int(self, query_rdd, num_partitions, use_legacy_apis, grid_type, index_type, expected_count):
        spatial_rdd = self.create_point_rdd(input_location, splitter, num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)
        join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
        results = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()

        self.sanity_check_flat_join_results(results)

        assert expected_count == results.__len__()
    def test_spatial_join_to_spatial_rdd(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
        sedona_rdd = jvm_sedona_rdd.to_rdd().collect()
        assert sedona_rdd.__len__() == 5
Exemple #3
0
    def test_dynamic_index_int(self, num_partitions, use_legacy_apis, grid_type, index_type, intersects):
        query_rdd = self.create_polygon_rdd(query_polygon_set, splitter, num_partitions)
        spatial_rdd = self.create_polygon_rdd(input_location, splitter, num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)

        join_params = JoinParams(intersects, index_type, JoinBuildSide.LEFT)
        result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()

        self.sanity_check_flat_join_results(result)

        expected_count = self.get_expected_with_original_duplicates_count(intersects) \
            if self.expect_to_preserve_original_duplicates(grid_type) else self.get_expected_count(intersects)
        assert expected_count == result.__len__()
    def test_dynamic_index_int(self, num_partitions, grid_type, index_type):
        query_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions)
        spatial_rdd = self.create_rectangle_rdd(input_location, splitter, num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type)

        join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
        result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()

        self.sanity_check_flat_join_results(result)

        expected_count = match_with_original_duplicates_count \
            if self.expect_to_preserve_original_duplicates(grid_type) else match_count

        assert expected_count == result.__len__()
Exemple #5
0
    def spatialJoin(cls, queryWindowRDD: SpatialRDD, objectRDD: SpatialRDD, joinParams: JoinParams) -> RDD:
        """

        :param queryWindowRDD: SpatialRDD
        :param objectRDD: SpatialRDD
        :param joinParams: JoinParams
        :return:
        """

        jvm = queryWindowRDD._jvm
        sc = queryWindowRDD._sc

        jvm_join_params = joinParams.jvm_instance(jvm)

        srdd = jvm.JoinQuery.spatialJoin(queryWindowRDD._srdd, objectRDD._srdd, jvm_join_params)
        serialized = JvmSedonaPythonConverter(jvm).\
            translate_spatial_pair_rdd_to_python(srdd)

        return RDD(serialized, sc, SedonaPickler())
Exemple #6
0
    def test_spatial_join_query_and_build_index_on_polygons_on_the_fly(self):
        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True)

        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(join_query_partitionin_type)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        for i in range(each_query_loop_times):
            join_params = JoinParams(False, polygon_rdd_index_type,
                                     JoinBuildSide.LEFT)
            resultSize = JoinQuery.spatialJoin(query_window_rdd, object_rdd,
                                               join_params).count()
    def test_spatial_join_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(
            self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd,
                                                  areas_polygon_rdd,
                                                  JoinParams())
        sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"],
                                 ["poi_id", "poi_name"], self.spark)

        assert sedona_df.count() == 5
        assert sedona_df.columns == [
            "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id",
            "poi_name"
        ]