def test_distance_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        circle_rdd = CircleRDD(poi_point_rdd, 2.0)

        circle_rdd.analyze()
        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(poi_point_rdd, circle_rdd, False, True)

        assert jvm_sedona_rdd.to_rdd().collect().__len__() == 10
Beispiel #2
0
    def DistanceJoinQueryFlat(cls, spatialRDD: SpatialRDD,
                              queryRDD: SpatialRDD, useIndex: bool,
                              considerBoundaryIntersection: bool) -> RDD:
        """

        :param spatialRDD: SpatialRDD
        :param queryRDD: SpatialRDD
        :param useIndex: bool
        :param considerBoundaryIntersection: bool

        >> spatial_rdd =
        >> query_rdd =
        >> spatial_join_result = JoinQuery.DistanceJoinQueryFlat(spatial_rdd, query_rdd, True, True)
        >> spatial_join_result.collect()
        [GeoData(), GeoData()]
        :return:
        """

        pair_rdd = JoinQueryRaw.DistanceJoinQueryFlat(
            spatialRDD, queryRDD, useIndex, considerBoundaryIntersection)
        return pair_rdd.to_rdd()
    def test_distance_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        circle_rdd = CircleRDD(poi_point_rdd, 2.0)

        circle_rdd.analyze()
        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(
            poi_point_rdd, circle_rdd, False, True)
        df_sedona_rdd = Adapter.toDf(jvm_sedona_rdd,
                                     ["poi_from_id", "poi_from_name"],
                                     ["poi_to_id", "poi_to_name"], self.spark)

        assert df_sedona_rdd.count() == 10
        assert df_sedona_rdd.columns == [
            "leftgeometry", "poi_from_id", "poi_from_name", "rightgeometry",
            "poi_to_id", "poi_to_name"
        ]