def test_spatial_join_query_with_polygon_rdd_using_index(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        query_rdd.analyze()
        spatial_rdd.analyze()

        spatial_rdd.spatialPartitioning(grid_type)

        spatial_rdd.buildIndex(IndexType.RTREE, True)

        query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        assert result[1][0].getUserData() is not None

        for data in result:
            if data[1].__len__() != 0:
                for right_data in data[1]:
                    assert right_data.getUserData() is not None
예제 #2
0
    def test_voronoi_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=FileDataSplitter.CSV,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.VORONOI)

        for envelope in spatial_rdd.grids:
            print(envelope)
예제 #3
0
    def test_hilbert_curve_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=splitter,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.HILBERT)

        for envelope in spatial_rdd.grids:
            print(envelope)
예제 #4
0
 def test_empty_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location,
                              splitter=splitter,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.spatialPartitioning(grid_type)
     spatial_rdd.buildIndex(IndexType.RTREE, True)
     spatial_rdd_copy = PolygonRDD()
     spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD
     spatial_rdd_copy.analyze()
예제 #5
0
    def test_spatial_join_query(self):
        point_rdd = PointRDD(self.sc, point_path, 4, FileDataSplitter.WKT,
                             True)

        polygon_rdd = PolygonRDD(self.sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        point_rdd.analyze()
        point_rdd.spatialPartitioning(GridType.KDBTREE)
        polygon_rdd.spatialPartitioning(point_rdd.getPartitioner())
        result = JoinQuery.SpatialJoinQuery(point_rdd, polygon_rdd, True,
                                            False)

        print(result.count())
예제 #6
0
    def test_spatial_join_query(self):
        query_window_rdd = PolygonRDD(
            self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset,
            polygon_rdd_splitter, True
        )
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY)

        object_rdd.spatialPartitioning(join_query_partitioning_type)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window_rdd, False, True).count()
예제 #7
0
    def test_spatial_join_query_and_build_index_on_points_on_the_fly(self):
        query_window = PolygonRDD(self.sc, polygon_rdd_input_location,
                                  polygon_rdd_start_offset,
                                  polygon_rdd_end_offset, polygon_rdd_splitter,
                                  True)
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(join_query_partitionin_type)
        query_window.spatialPartitioning(object_rdd.getPartitioner())

        for i in range(each_query_loop_times):
            result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window,
                                                     True, False).count()
예제 #8
0
    def test_spatial_join_query_and_build_index_on_polygons_on_the_fly(self):
        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True)

        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(join_query_partitionin_type)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        for i in range(each_query_loop_times):
            join_params = JoinParams(False, polygon_rdd_index_type,
                                     JoinBuildSide.LEFT)
            resultSize = JoinQuery.spatialJoin(query_window_rdd, object_rdd,
                                               join_params).count()
    def test_polygon_distance_join_with_crs_transformation(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3857")
        window_rdd = CircleRDD(query_rdd, 0.1)

        object_rdd = PolygonRDD(self.sc, input_location_query_polygon,
                                splitter, True, num_partitions,
                                StorageLevel.MEMORY_ONLY, "epsg:4326",
                                "epsg:3857")

        object_rdd.rawJvmSpatialRDD.jsrdd.repartition(4)
        object_rdd.spatialPartitioning(GridType.KDBTREE)
        object_rdd.buildIndex(IndexType.RTREE, True)
        window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        results = JoinQuery.DistanceJoinQuery(object_rdd, window_rdd, True,
                                              False).collect()
        assert 5467 == results.__len__()

        for data in results:
            for polygon_data in data[1]:
                assert Circle(data[0].geom, 0.1).covers(polygon_data.geom)