コード例 #1
0
 def test_empty_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location,
                              splitter=splitter,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.spatialPartitioning(grid_type)
     spatial_rdd.buildIndex(IndexType.RTREE, True)
     spatial_rdd_copy = PolygonRDD()
     spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD
     spatial_rdd_copy.analyze()
コード例 #2
0
    def test_geo_data_convert_linestring_rdd(self):
        polygon = Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])

        ext = [(0, 0), (0, 2), (2, 2), (2, 0), (0, 0)]
        int = [(1, 1), (1.5, 1), (1.5, 1.5), (1, 1.5), (1, 1)]

        polygon2 = Polygon(ext, [int])

        wkt = "POLYGON ((-71.1776585052917 42.3902909739571, -71.1776820268866 42.3903701743239, -71.1776063012595 42.3903825660754, -71.1775826583081 42.3903033653531, -71.1776585052917 42.3902909739571))"
        polygon3 = loads(wkt)

        polygons = [
            GeoData(geom=polygon, userData="a"),
            GeoData(geom=polygon2, userData="b"),
            GeoData(geom=polygon3, userData="c"),
        ]

        rdd_data = self.sc.parallelize(polygons)

        polygon_rdd = PolygonRDD(rdd_data)
        collected_data = polygon_rdd.rawSpatialRDD.collect()
        sorted_collected_data = sorted(collected_data,
                                       key=lambda x: x.userData)
        assert all([
            geo1 == geo2 for geo1, geo2 in zip(polygons, sorted_collected_data)
        ])
コード例 #3
0
    def test_outside_polygon_join_correctness(self):
        window_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set),
            StorageLevel.MEMORY_ONLY)
        object_rdd = PolygonRDD(
            self.sc.parallelize(self.test_outside_polygon_set),
            StorageLevel.MEMORY_ONLY)
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True,
                                            False).collect()
        assert 0 == result.__len__()

        result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd,
                                                     False, False).collect()
        assert 0 == result_no_index.__len__()
コード例 #4
0
    def test_overlapped_polygon_join_correctness(self):
        window_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set),
            StorageLevel.MEMORY_ONLY)
        object_rdd = PolygonRDD(
            self.sc.parallelize(self.test_overlapped_polygon_set),
            StorageLevel.MEMORY_ONLY)
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True,
                                            True).collect()
        self.verify_join_result(result)

        result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd,
                                                     False, True).collect()
        self.verify_join_result(result_no_index)
コード例 #5
0
 def test_saving_to_disc_spatial_rdd_polygon(self,
                                             remove_spatial_rdd_disc_dir):
     from tests.properties.polygon_properties import input_location, splitter, num_partitions
     polygon_rdd = PolygonRDD(self.sc, input_location, splitter, True,
                              num_partitions, StorageLevel.MEMORY_ONLY)
     polygon_rdd.rawJvmSpatialRDD.saveAsObjectFile(
         os.path.join(disc_object_location, "polygon"))
コード例 #6
0
    def test_spatial_knn_correctness(self):
        polygon_rdd = PolygonRDD(self.sc, input_location, splitter, True)
        result_no_index = KNNQuery.SpatialKnnQuery(polygon_rdd,
                                                   self.query_point,
                                                   self.top_k, False)
        polygon_rdd.buildIndex(IndexType.RTREE, False)
        result_with_index = KNNQuery.SpatialKnnQuery(polygon_rdd,
                                                     self.query_point,
                                                     self.top_k, True)

        sorted_result_no_index = sorted(
            result_no_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, self.query_point))

        sorted_result_with_index = sorted(
            result_with_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, self.query_point))

        difference = 0
        for x in range(self.top_k):
            difference += sorted_result_no_index[x].geom.distance(
                sorted_result_with_index[x].geom)

        assert difference == 0
コード例 #7
0
    def test_spatial_join_query_with_polygon_rdd_using_index(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        query_rdd.analyze()
        spatial_rdd.analyze()

        spatial_rdd.spatialPartitioning(grid_type)

        spatial_rdd.buildIndex(IndexType.RTREE, True)

        query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        assert result[1][0].getUserData() is not None

        for data in result:
            if data[1].__len__() != 0:
                for right_data in data[1]:
                    assert right_data.getUserData() is not None
コード例 #8
0
 def load(cls, sc: SparkContext, path: str) -> SpatialRDD:
     jvm = sc._jvm
     polygon_rdd = PolygonRDD()
     srdd = SpatialObjectLoaderAdapter(jvm).load_polygon_spatial_rdd(
         sc._jsc, path)
     polygon_rdd.set_srdd(srdd)
     return polygon_rdd
コード例 #9
0
 def test_saving_to_disc_index_polygon(self):
     from tests.properties.polygon_properties import input_location, splitter, num_partitions
     polygon_rdd = PolygonRDD(self.sc, input_location, splitter, True,
                              num_partitions, StorageLevel.MEMORY_ONLY)
     polygon_rdd.buildIndex(IndexType.RTREE, False)
     polygon_rdd.indexedRawRDD.saveAsObjectFile(
         os.path.join(disc_location, "polygon_index"))
コード例 #10
0
 def test_spatial_knn_query_using_index(self):
     polygon_rdd = PolygonRDD(self.sc, input_location, splitter, True)
     polygon_rdd.buildIndex(IndexType.RTREE, False)
     for i in range(self.loop_times):
         result = KNNQuery.SpatialKnnQuery(polygon_rdd, self.query_point,
                                           self.top_k, True)
         assert result.__len__() > -1
         assert result[0].getUserData() is not None
コード例 #11
0
    def test_outside_polygon_distance_join_correctness(self):
        center_geometry_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set),
            StorageLevel.MEMORY_ONLY)
        window_rdd = CircleRDD(center_geometry_rdd, 0.1)
        object_rdd = PolygonRDD(
            self.sc.parallelize(self.test_outside_polygon_set),
            StorageLevel.MEMORY_ONLY)
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True,
                                             True).collect()
        assert 0 == result.__len__()

        result_no_index = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd,
                                                      False, True).collect()
        assert 0 == result_no_index.__len__()
コード例 #12
0
 def test_build_index_without_set_grid(self):
     spatial_rdd = PolygonRDD(self.sc,
                              input_location,
                              FileDataSplitter.CSV,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.buildIndex(IndexType.RTREE, False)
コード例 #13
0
    def test_creating_polygon_rdd(self):
        polygon_rdd = PolygonRDD(self.spark._sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        polygon_rdd.analyze()

        cnt = polygon_rdd.countWithoutDuplicates()

        assert cnt == 407, f"Polygon RDD should have 407 but found {cnt}"
コード例 #14
0
    def test_geojson_to_dataframe(self):
        spatial_rdd = PolygonRDD(self.spark.sparkContext,
                                 geojson_input_location,
                                 FileDataSplitter.GEOJSON, True)

        spatial_rdd.analyze()
        Adapter.toDf(spatial_rdd, self.spark).show()
        df = Adapter.toDf(spatial_rdd, self.spark)

        assert (df.columns[1] == "STATEFP")
コード例 #15
0
    def test_geojson_to_dataframe(self):
        spatial_rdd = PolygonRDD(self.spark.sparkContext,
                                 geojson_input_location,
                                 FileDataSplitter.GEOJSON, True)

        spatial_rdd.analyze()

        df = Adapter.toDf(spatial_rdd, self.spark).\
            withColumn("geometry", expr("ST_GeomFromWKT(geometry)"))
        df.show()
        assert (df.columns[1] == "STATEFP")
コード例 #16
0
    def test_spatial_range_query(self):
        spatial_rdd = PolygonRDD(self.sc, input_location, splitter, True,
                                 StorageLevel.MEMORY_ONLY)
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count()
            assert result_size == 704

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False,
            False).take(10)[0].getUserData() is not None
コード例 #17
0
 def test_wkb_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location_wkb,
                              splitter=FileDataSplitter.WKB,
                              carryInputData=True,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     assert spatial_rdd.approximateTotalCount == 103
     assert spatial_rdd.boundaryEnvelope is not None
     assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData(
     ) == "31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"
コード例 #18
0
    def test_voronoi_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=FileDataSplitter.CSV,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.VORONOI)

        for envelope in spatial_rdd.grids:
            print(envelope)
コード例 #19
0
 def test_load_id_column_data_check(self):
     spatial_rdd = PolygonRDD(self.spark.sparkContext,
                              geojson_id_input_location,
                              FileDataSplitter.GEOJSON, True)
     spatial_rdd.analyze()
     df = Adapter.toDf(spatial_rdd, self.spark)
     df.show()
     try:
         assert df.columns.__len__() == 3
     except AssertionError:
         assert df.columns.__len__() == 4
     assert df.count() == 1
コード例 #20
0
    def test_hilbert_curve_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=splitter,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.HILBERT)

        for envelope in spatial_rdd.grids:
            print(envelope)
コード例 #21
0
    def readToPolygonRDD(cls, sc: SparkContext, inputPath: str) -> PolygonRDD:
        """

        :param sc:
        :param inputPath:
        :return:
        """
        jvm = sc._jvm
        jsc = sc._jsc
        srdd = jvm.ShapefileReader.readToPolygonRDD(jsc, inputPath)
        spatial_rdd = PolygonRDD()
        spatial_rdd.set_srdd(srdd)
        return spatial_rdd
コード例 #22
0
    def test_spatial_join_query(self):
        point_rdd = PointRDD(self.sc, point_path, 4, FileDataSplitter.WKT,
                             True)

        polygon_rdd = PolygonRDD(self.sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        point_rdd.analyze()
        point_rdd.spatialPartitioning(GridType.KDBTREE)
        polygon_rdd.spatialPartitioning(point_rdd.getPartitioner())
        result = JoinQuery.SpatialJoinQuery(point_rdd, polygon_rdd, True,
                                            False)

        print(result.count())
コード例 #23
0
    def test_polygon_distance_join_with_crs_transformation(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3857")
        window_rdd = CircleRDD(query_rdd, 0.1)

        object_rdd = PolygonRDD(self.sc, input_location_query_polygon,
                                splitter, True, num_partitions,
                                StorageLevel.MEMORY_ONLY, "epsg:4326",
                                "epsg:3857")

        object_rdd.rawJvmSpatialRDD.jsrdd.repartition(4)
        object_rdd.spatialPartitioning(GridType.KDBTREE)
        object_rdd.buildIndex(IndexType.RTREE, True)
        window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        results = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True,
                                              False).collect()
        assert 5467 == results.__len__()

        for data in results:
            for polygon_data in data[1]:
                assert Circle(data[0].geom, 0.1).covers(polygon_data.geom)
コード例 #24
0
    def test_mbr(self):
        polygon_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=FileDataSplitter.CSV,
                                 carryInputData=True,
                                 partitions=num_partitions)

        rectangle_rdd = polygon_rdd.MinimumBoundingRectangle()

        result = rectangle_rdd.rawSpatialRDD.collect()

        for el in result:
            print(el.geom.wkt)
        print(result)
        assert result.__len__() > -1
コード例 #25
0
 def test_geojson_constructor(self):
     spatial_rdd = PolygonRDD(
         sparkContext=self.sc,
         InputLocation=input_location_geo_json,
         splitter=FileDataSplitter.GEOJSON,
         carryInputData=True,
         partitions=4,
         newLevel=StorageLevel.MEMORY_ONLY
     )
     spatial_rdd.analyze()
     assert spatial_rdd.approximateTotalCount == 1001
     assert spatial_rdd.boundaryEnvelope is not None
     assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData() == "01\t077\t011501\t5\t1500000US010770115015\t010770115015\t5\tBG\t6844991\t32636"
     assert spatial_rdd.rawSpatialRDD.take(2)[1].getUserData() == "01\t045\t021102\t4\t1500000US010450211024\t010450211024\t4\tBG\t11360854\t0"
     assert spatial_rdd.fieldNames == ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE", "AFFGEOID", "GEOID", "NAME", "LSAD", "ALAND", "AWATER"]
コード例 #26
0
    def test_spatial_join_query(self):
        query_window_rdd = PolygonRDD(
            self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset,
            polygon_rdd_splitter, True
        )
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY)

        object_rdd.spatialPartitioning(join_query_partitioning_type)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window_rdd, False, True).count()
コード例 #27
0
    def test_inside_point_join_correctness(self):
        self.once_before_all()

        window_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set))

        object_rdd = PointRDD(self.sc.parallelize(self.test_inside_point_set))
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True,
                                            False).collect()
        self.verify_join_result(result)

        result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd,
                                                     False, False).collect()
        self.verify_join_result(result_no_index)
コード例 #28
0
    def test_to_df_srdd_fn_spark(self):
        spatial_rdd = PolygonRDD(self.spark.sparkContext,
                                 geojson_input_location,
                                 FileDataSplitter.GEOJSON, True)
        spatial_rdd.analyze()
        assert spatial_rdd.approximateTotalCount == 1001

        spatial_columns = [
            "state_id", "county_id", "tract_id", "bg_id", "fips", "fips_short",
            "bg_nr", "type", "code1", "code2"
        ]
        spatial_df = Adapter.toDf(spatial_rdd, spatial_columns, self.spark)

        spatial_df.show()

        assert spatial_df.columns == ["geometry", *spatial_columns]
        assert spatial_df.count() == 1001
コード例 #29
0
ファイル: test_rdd.py プロジェクト: paulmvp/incubator-sedona
    def test_spatial_join_query_and_build_index_on_points_on_the_fly(self):
        query_window = PolygonRDD(self.sc, polygon_rdd_input_location,
                                  polygon_rdd_start_offset,
                                  polygon_rdd_end_offset, polygon_rdd_splitter,
                                  True)
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(join_query_partitionin_type)
        query_window.spatialPartitioning(object_rdd.getPartitioner())

        for i in range(each_query_loop_times):
            result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window,
                                                     True, False).count()
コード例 #30
0
    def test_polygon_rdd(self):
        polygon_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=polygon_rdd_input_location,
                                 startOffset=polygon_rdd_start_offset,
                                 endOffset=polygon_rdd_end_offset,
                                 splitter=polygon_rdd_splitter,
                                 carryInputData=True)

        collected_polygon_rdd = polygon_rdd.getRawSpatialRDD().collect()

        input_wkt_polygons = [
            "POLYGON ((-74.020753 40.836454, -74.020753 40.843768, -74.018162 40.843768, -74.018162 40.836454, -74.020753 40.836454))",
            "POLYGON ((-74.018978 40.837712, -74.018978 40.852181, -74.014938 40.852181, -74.014938 40.837712, -74.018978 40.837712))",
            "POLYGON ((-74.021683 40.833253, -74.021683 40.834288, -74.021368 40.834288, -74.021368 40.833253, -74.021683 40.833253))"
        ]

        assert [geo_data.geom.wkt for geo_data in collected_polygon_rdd
                ][:3] == input_wkt_polygons