def test_spatial_knn_correctness(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                             StorageLevel.MEMORY_ONLY, "epsg:4326",
                             "epsg:3005")
        result_no_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point,
                                                   top_k, False)
        point_rdd.buildIndex(IndexType.RTREE, False)
        result_with_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point,
                                                     top_k, True)

        sorted_result_no_index = sorted(
            result_no_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, query_point))

        sorted_result_with_index = sorted(
            result_with_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, query_point))

        difference = 0
        for x in range(top_k):
            difference += sorted_result_no_index[x].geom.distance(
                sorted_result_with_index[x].geom)

        assert difference == 0
예제 #2
0
    def test_creating_point_rdd(self):
        point_rdd = PointRDD(self.spark._sc, point_path, 4,
                             FileDataSplitter.WKT, True)

        point_rdd.analyze()
        cnt = point_rdd.countWithoutDuplicates()
        assert cnt == 12872, f"Point RDD should have 12872 but found {cnt}"
    def test_spatial_knn_query_using_index(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, False)
        point_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, self.query_point, self.top_k, False)
            assert result.__len__() > -1
 def test_saving_to_disc_index_point(self):
     from tests.properties.point_properties import input_location, offset, splitter, num_partitions
     point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                          num_partitions, StorageLevel.MEMORY_ONLY)
     point_rdd.buildIndex(IndexType.RTREE, False)
     point_rdd.indexedRawRDD.saveAsObjectFile(
         os.path.join(disc_location, "point_index"))
예제 #5
0
 def load(cls, sc: SparkContext, path: str) -> SpatialRDD:
     jvm = sc._jvm
     point_rdd = PointRDD()
     srdd = SpatialObjectLoaderAdapter(jvm).load_point_spatial_rdd(
         sc._jsc, path)
     point_rdd.set_srdd(srdd)
     return point_rdd
예제 #6
0
 def test_crs_transformation_spatial_range_query_using_index(self):
     object_rdd = PointRDD(self.sc, point_rdd_input_location, point_rdd_offset,
                          point_rdd_splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005")
     object_rdd.buildIndex(point_rdd_index_type, False)
     object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY)
     for _ in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, True).count()
         assert result_size > -1
예제 #7
0
    def test_spatial_knn_query_using_index(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.buildIndex(point_rdd_index_type, False)
        object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point, 1000, True)
예제 #8
0
 def test_build_index_without_set_grid(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=num_partitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.buildIndex(IndexType.RTREE, False)
예제 #9
0
    def test_spatial_knn_query_using_index(self):
        point_rdd = PointRDD(
            self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005"
        )
        point_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False)
            assert result.__len__() > 0
            assert result[0].getUserData() is not None
예제 #10
0
 def test_knn_query_with_index(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     object_rdd.buildIndex(point_rdd_index_type, False)
     for i in range(each_query_loop_times):
         result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point,
                                           1000, True)
    def test_spatial_join_query_with_polygon_rdd_using_index(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        query_rdd.analyze()
        spatial_rdd.analyze()

        spatial_rdd.spatialPartitioning(grid_type)

        spatial_rdd.buildIndex(IndexType.RTREE, True)

        query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        assert result[1][0].getUserData() is not None

        for data in result:
            if data[1].__len__() != 0:
                for right_data in data[1]:
                    assert right_data.getUserData() is not None
    def test_spatial_range_query_using_index(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)

        spatial_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
                .count()
            assert result_size == 2830
예제 #13
0
 def test_range_query_using_index(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     object_rdd.buildIndex(point_rdd_index_type, False)
     for i in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                    range_query_window,
                                                    False, True).count
예제 #14
0
    def test_crs_transform(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        spatial_rdd.CRSTransform("epsg:4326", "epsg:3857")

        assert spatial_rdd.rawSpatialRDD.collect(
        )[0].geom.wkt == "POINT (-9833016.710450118 3805934.914254189)"
예제 #15
0
    def readToPointRDD(cls, sc: SparkContext, inputPath: str) -> PointRDD:
        """

        :param sc:
        :param inputPath:
        :return:
        """
        jvm = sc._jvm
        jsc = sc._jsc
        srdd = jvm.ShapefileReader.readToPointRDD(jsc, inputPath)
        spatial_rdd = PointRDD()
        spatial_rdd.set_srdd(srdd)
        return spatial_rdd
예제 #16
0
    def test_get_crs_transformation(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        assert not spatial_rdd.getCRStransformation()
        spatial_rdd.CRSTransform("epsg:4326", "epsg:3857")

        assert spatial_rdd.getCRStransformation()
예제 #17
0
    def test_get_source_epsg_code(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        assert spatial_rdd.getSourceEpsgCode() == ""

        spatial_rdd.CRSTransform("epsg:4326", "epsg:3857")

        assert spatial_rdd.getSourceEpsgCode() == "epsg:4326"
예제 #18
0
    def test_spatial_range_query(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count()

        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count()
예제 #19
0
 def test_crs_tranformed_spatial_range_query_using_index(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False,
                           newLevel=StorageLevel.DISK_ONLY,
                           sourceEpsgCRSCode="epsg:4326",
                           targetEpsgCode="epsg:3005")
     object_rdd.buildIndex(point_rdd_index_type, False)
     for i in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                    range_query_window,
                                                    False, True).count
    def test_point_rdd(self):
        point_rdd = PointRDD(sparkContext=self.sc,
                             InputLocation=point_rdd_input_location,
                             Offset=point_rdd_offset,
                             splitter=point_rdd_splitter,
                             carryInputData=False)

        collected_points = point_rdd.getRawSpatialRDD().collect()

        points_coordinates = [[-88.331492, 32.324142], [-88.175933, 32.360763],
                              [-88.388954, 32.357073], [-88.221102, 32.35078]]

        assert [[geo_data.geom.x, geo_data.geom.y]
                for geo_data in collected_points[:4]] == points_coordinates[:4]
    def test_spatial_range_query_using_index(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               StorageLevel.MEMORY_ONLY, "epsg:4326",
                               "epsg:3005")
        spatial_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(loop_times):
            result_size = RangeQuery.SpatialRangeQuery(spatial_rdd,
                                                       query_envelope, False,
                                                       False).count()
            assert result_size == 3127

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, query_envelope, False,
            False).take(10)[1].getUserData() is not None
예제 #22
0
    def test_distance_join_query(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY)
        query_window_rdd = CircleRDD(object_rdd, 0.1)

        object_rdd.spatialPartitioning(GridType.QUADTREE)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.spatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)
        assert object_rdd.spatialPartitionedRDD.is_cached

        query_window_rdd.spatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, False, True).count()
예제 #23
0
 def test_empty_constructor(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=num_partitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.buildIndex(IndexType.RTREE, False)
     spatial_rdd_copy = PointRDD()
     spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD
     spatial_rdd_copy.analyze()
예제 #24
0
    def test_spatial_join_query(self):
        query_window_rdd = PolygonRDD(
            self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset,
            polygon_rdd_splitter, True
        )
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY)

        object_rdd.spatialPartitioning(join_query_partitioning_type)
        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window_rdd, False, True).count()
    def test_point_rdd(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        raw_spatial_rdd = spatial_rdd.rawSpatialRDD.map(
            lambda x: [x.geom, *x.getUserData().split("\t")])

        self.spark.createDataFrame(raw_spatial_rdd).show()

        schema = StructType([
            StructField("geom", GeometryType()),
            StructField("name", StringType())
        ])

        spatial_rdd_with_schema = self.spark.createDataFrame(
            raw_spatial_rdd, schema, verifySchema=False)

        spatial_rdd_with_schema.show()

        assert spatial_rdd_with_schema.take(
            1)[0][0].wkt == "POINT (32.324142 -88.331492)"
예제 #26
0
    def test_spatial_knn_query(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, self.query_point,
                                              self.top_k, False)
            assert result.__len__() > -1
            assert result[0].getUserData() is not None
예제 #27
0
    def test_spatial_knn_query(self):
        object_rdd = PointRDD(
            self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY
        )
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point, 1000, False)
    def test_saving_to_disc_spatial_rdd_point(self):
        from tests.properties.point_properties import input_location, offset, splitter, num_partitions

        point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                             num_partitions, StorageLevel.MEMORY_ONLY)

        point_rdd.rawJvmSpatialRDD.saveAsObjectFile(
            os.path.join(disc_location, "point"))
예제 #29
0
 def create_spatial_rdd(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_file_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=numPartitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     return spatial_rdd
예제 #30
0
 def test_spatial_range_query(self):
     spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False)
     for i in range(self.loop_times):
         result_size = RangeQuery.\
             SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
             .count()
         assert result_size == 2830
     assert RangeQuery.SpatialRangeQuery(
         spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                getUserData() is not None