Ejemplo n.º 1
0
    def test_spatial_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(
            self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat(
            poi_point_rdd, areas_polygon_rdd, False, True)

        pois_within_areas_with_default_column_names = Adapter.toDf(
            jvm_sedona_rdd, self.spark)

        assert pois_within_areas_with_default_column_names.count() == 5

        pois_within_areas_with_passed_column_names = Adapter.toDf(
            jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"],
            self.spark)

        assert pois_within_areas_with_passed_column_names.count() == 5

        assert pois_within_areas_with_passed_column_names.columns == [
            "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id",
            "poi_name"
        ]

        assert pois_within_areas_with_default_column_names.schema == StructType(
            [
                StructField("leftgeometry", GeometryType()),
                StructField("rightgeometry", GeometryType()),
            ])

        left_geometries_raw = pois_within_areas_with_default_column_names. \
            selectExpr("ST_AsText(leftgeometry)"). \
            collect()

        left_geometries = self.__row_to_list(left_geometries_raw)

        right_geometries_raw = pois_within_areas_with_default_column_names. \
            selectExpr("ST_AsText(rightgeometry)"). \
            collect()

        right_geometries = self.__row_to_list(right_geometries_raw)

        # Ignore the ordering of these
        assert set(geom[0] for geom in left_geometries) == set([
            'POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))',
            'POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))',
            'POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))',
            'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))',
            'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))'
        ])
        assert set(geom[0] for geom in right_geometries) == set([
            'POINT (-3 5)', 'POINT (11 5)', 'POINT (4 3)', 'POINT (-1 -1)',
            'POINT (-4 -5)'
        ])
    def test_spatial_join_to_spatial_rdd(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
        sedona_rdd = jvm_sedona_rdd.to_rdd().collect()
        assert sedona_rdd.__len__() == 5
    def test_spatial_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat(
            poi_point_rdd, areas_polygon_rdd, False, True)

        assert jvm_sedona_rdd.to_rdd().collect().__len__() == 5
    def test_range_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)

        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        poi_point_rdd.buildIndex(IndexType.QUADTREE, False)

        result = RangeQueryRaw.SpatialRangeQuery(
            poi_point_rdd, loads("POLYGON((0 0, 0 20, 20 20, 20 0, 0 0))"),
            True, True)

        rdd = result.to_rdd()

        assert rdd.collect().__len__() == 4

        df_without_column_names = Adapter.toDf(result, self.spark)

        raw_geometries = self.__row_to_list(df_without_column_names.collect())

        assert [point[0].wkt for point in raw_geometries] == [
            'POINT (9 8)', 'POINT (4 3)', 'POINT (12 1)', 'POINT (11 5)'
        ]
        assert df_without_column_names.count() == 4
        assert df_without_column_names.schema == StructType(
            [StructField("geometry", GeometryType())])

        df = Adapter.toDf(result, self.spark, ["poi_id", "poi_name"])

        assert df.count() == 4
        assert df.columns == ["geometry", "poi_id", "poi_name"]
    def test_distance_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
        circle_rdd = CircleRDD(poi_point_rdd, 2.0)

        circle_rdd.analyze()
        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(poi_point_rdd, circle_rdd, False, True)

        assert jvm_sedona_rdd.to_rdd().collect().__len__() == 10
    def test_spatial_join_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        areas_polygon_rdd = WktReader.readToGeometryRDD(
            self.sc, areas_csv_path, 1, False, False)
        poi_point_rdd.analyze()
        areas_polygon_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd,
                                                  areas_polygon_rdd,
                                                  JoinParams())
        sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"],
                                 ["poi_id", "poi_name"], self.spark)

        assert sedona_df.count() == 5
        assert sedona_df.columns == [
            "leftgeometry", "area_id", "area_name", "rightgeometry", "poi_id",
            "poi_name"
        ]
    def test_range_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)

        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        poi_point_rdd.buildIndex(IndexType.QUADTREE, False)

        result = RangeQueryRaw.SpatialRangeQuery(
            poi_point_rdd, loads("POLYGON((0 0, 0 20, 20 20, 20 0, 0 0))"), True, True
        )

        rdd = result.to_rdd()

        assert rdd.collect().__len__() == 4
    def test_distance_join_query_flat_to_df(self):
        poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1,
                                                    False, False)
        circle_rdd = CircleRDD(poi_point_rdd, 2.0)

        circle_rdd.analyze()
        poi_point_rdd.analyze()

        poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
        circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())

        jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(
            poi_point_rdd, circle_rdd, False, True)
        df_sedona_rdd = Adapter.toDf(jvm_sedona_rdd,
                                     ["poi_from_id", "poi_from_name"],
                                     ["poi_to_id", "poi_to_name"], self.spark)

        assert df_sedona_rdd.count() == 10
        assert df_sedona_rdd.columns == [
            "leftgeometry", "poi_from_id", "poi_from_name", "rightgeometry",
            "poi_to_id", "poi_to_name"
        ]
Ejemplo n.º 9
0
 def test_read_to_geometry_rdd(self):
     wkt_geometries = os.path.join(tests_resource, "county_small.tsv")
     wkt_rdd = WktReader.readToGeometryRDD(self.sc, wkt_geometries, 0, True,
                                           False)
     assert wkt_rdd.rawSpatialRDD.count() == 103
     print(wkt_rdd.rawSpatialRDD.collect())