def test_empty_constructor_test(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     object_rdd_copy = PointRDD()
     object_rdd_copy.rawJvmSpatialRDD = object_rdd.rawJvmSpatialRDD
     object_rdd_copy.analyze()
 def test_empty_constructor(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=num_partitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.buildIndex(IndexType.RTREE, False)
     spatial_rdd_copy = PointRDD()
     spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD
     spatial_rdd_copy.analyze()
    def test_creating_point_rdd(self):
        point_rdd = PointRDD(self.spark._sc, point_path, 4,
                             FileDataSplitter.WKT, True)

        point_rdd.analyze()
        cnt = point_rdd.countWithoutDuplicates()
        assert cnt == 12872, f"Point RDD should have 12872 but found {cnt}"
Example #4
0
    def test_spatial_knn_query(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, self.query_point, self.top_k, False)
            assert result.__len__() > -1
            assert result[0].getUserData() is not None
Example #5
0
 def load(cls, sc: SparkContext, path: str) -> SpatialRDD:
     jvm = sc._jvm
     point_rdd = PointRDD()
     srdd = SpatialObjectLoaderAdapter(jvm).load_point_spatial_rdd(
         sc._jsc, path)
     point_rdd.set_srdd(srdd)
     return point_rdd
    def test_spatial_knn_correctness(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                             StorageLevel.MEMORY_ONLY, "epsg:4326",
                             "epsg:3005")
        result_no_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point,
                                                   top_k, False)
        point_rdd.buildIndex(IndexType.RTREE, False)
        result_with_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point,
                                                     top_k, True)

        sorted_result_no_index = sorted(
            result_no_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, query_point))

        sorted_result_with_index = sorted(
            result_with_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, query_point))

        difference = 0
        for x in range(top_k):
            difference += sorted_result_no_index[x].geom.distance(
                sorted_result_with_index[x].geom)

        assert difference == 0
    def test_raw_spatial_rdd_assignment(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()

        empty_point_rdd = PointRDD()
        empty_point_rdd.rawSpatialRDD = spatial_rdd.rawSpatialRDD
        empty_point_rdd.analyze()
        assert empty_point_rdd.countWithoutDuplicates(
        ) == spatial_rdd.countWithoutDuplicates()
        assert empty_point_rdd.boundaryEnvelope == spatial_rdd.boundaryEnvelope

        assert empty_point_rdd.rawSpatialRDD.map(
            lambda x: x.geom.area).collect()[0] == 0.0
        assert empty_point_rdd.rawSpatialRDD.take(9)[4].getUserData(
        ) == "testattribute0\ttestattribute1\ttestattribute2"
    def test_point_rdd(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=crs_test_point,
                               Offset=0,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        raw_spatial_rdd = spatial_rdd.rawSpatialRDD.map(
            lambda x: [x.geom, *x.getUserData().split("\t")])

        self.spark.createDataFrame(raw_spatial_rdd).show()

        schema = StructType([
            StructField("geom", GeometryType()),
            StructField("name", StringType())
        ])

        spatial_rdd_with_schema = self.spark.createDataFrame(
            raw_spatial_rdd, schema, verifySchema=False)

        spatial_rdd_with_schema.show()

        assert spatial_rdd_with_schema.take(
            1)[0][0].wkt == "POINT (32.324142 -88.331492)"
Example #9
0
 def test_saving_to_disc_index_linestring(self, remove_spatial_rdd_disc_dir):
     from tests.properties.point_properties import input_location, offset, splitter, num_partitions
     point_rdd = PointRDD(
         self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY
     )
     point_rdd.buildIndex(IndexType.RTREE, False)
     point_rdd.indexedRawRDD.saveAsObjectFile(os.path.join(disc_object_location, "point_index"))
Example #10
0
    def test_saving_to_disc_spatial_rdd_point(self, remove_spatial_rdd_disc_dir):
        from tests.properties.point_properties import input_location, offset, splitter, num_partitions

        point_rdd = PointRDD(
            self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY
        )

        point_rdd.rawJvmSpatialRDD.saveAsObjectFile(os.path.join(disc_object_location, "point"))
Example #11
0
    def test_spatial_knn_query(self):
        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True,
                              StorageLevel.MEMORY_ONLY)
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point,
                                              1000, False)
Example #12
0
 def test_knn_query(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     for i in range(each_query_loop_times):
         result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point,
                                           1000, False)
Example #13
0
    def test_spatial_range_query(self):
        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True,
                              StorageLevel.MEMORY_ONLY)
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                       range_query_window,
                                                       False, False).count()

        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True,
                              StorageLevel.MEMORY_ONLY)
        object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                       range_query_window,
                                                       False, False).count()
Example #14
0
 def test_circle_rdd(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     circle_rdd = CircleRDD(object_rdd, 0.1)
     collected_data = circle_rdd.getRawSpatialRDD().collect()
     print([geo_data.geom.wkt for geo_data in collected_data])
 def test_build_index_without_set_grid(self):
     spatial_rdd = PointRDD(sparkContext=self.sc,
                            InputLocation=input_location,
                            Offset=offset,
                            splitter=splitter,
                            carryInputData=True,
                            partitions=num_partitions,
                            newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.buildIndex(IndexType.RTREE, False)
Example #16
0
    def test_indexed_rdd_assignment(self):
        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True)
        query_window_rdd = CircleRDD(object_rdd, 0.1)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(GridType.QUADTREE)
        object_rdd.buildIndex(IndexType.QUADTREE, True)

        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.buildIndex(IndexType.RTREE, False)

        object_rdd.indexedRDD.persist(StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.persist(
            StorageLevel.MEMORY_ONLY)
        query_window_rdd.jvmSpatialPartitionedRDD.count()
        object_rdd.indexedRDD.count()

        import time

        start = time.time()
        for _ in range(each_query_loop_times):
            result_size = JoinQuery.DistanceJoinQuery(object_rdd,
                                                      query_window_rdd, True,
                                                      True).count()
        diff = time.time() - start

        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True)
        query_window_rdd = CircleRDD(object_rdd, 0.1)

        object_rdd.analyze()
        object_rdd.spatialPartitioning(GridType.QUADTREE)
        object_rdd.buildIndex(IndexType.QUADTREE, True)

        query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())

        object_rdd.buildIndex(IndexType.RTREE, False)

        start1 = time.time()
        for _ in range(each_query_loop_times):
            result_size = JoinQuery.DistanceJoinQuery(object_rdd,
                                                      query_window_rdd, True,
                                                      True).count()
Example #17
0
    def test_spatial_knn_query_using_index(self):
        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True,
                              StorageLevel.MEMORY_ONLY)
        object_rdd.buildIndex(point_rdd_index_type, False)
        object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY)

        for _ in range(each_query_loop_times):
            result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point,
                                              1000, True)
    def test_spatial_knn_query(self):
        point_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                             StorageLevel.MEMORY_ONLY, "epsg:4326",
                             "epsg:3005")

        for i in range(loop_times):
            result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k,
                                              False)
            assert result.__len__() > 0
            assert result[0].getUserData() is not None
Example #19
0
 def test_spatial_range_query(self):
     spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                            False)
     for i in range(self.loop_times):
         result_size = RangeQuery.\
             SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
             .count()
         assert result_size == 2830
     assert RangeQuery.SpatialRangeQuery(
         spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                getUserData() is not None
Example #20
0
 def test_range_query_using_index(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False)
     object_rdd.buildIndex(point_rdd_index_type, False)
     for i in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                    range_query_window,
                                                    False, True).count
Example #21
0
 def create_spatial_rdd(self):
     spatial_rdd = PointRDD(
         sparkContext=self.sc,
         InputLocation=input_file_location,
         Offset=offset,
         splitter=splitter,
         carryInputData=True,
         partitions=numPartitions,
         newLevel=StorageLevel.MEMORY_ONLY
     )
     return spatial_rdd
Example #22
0
    def test_save_as_geo_json_with_data(self, remove_wkb_directory):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=inputLocation,
                               Offset=offset,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=numPartitions,
                               newLevel=StorageLevel.MEMORY_ONLY)

        spatial_rdd.saveAsGeoJSON(test_save_as_wkb_with_data)

        result_wkb = PointRDD(sparkContext=self.sc,
                              InputLocation=test_save_as_wkb_with_data,
                              splitter=FileDataSplitter.GEOJSON,
                              carryInputData=True,
                              partitions=numPartitions,
                              newLevel=StorageLevel.MEMORY_ONLY)

        assert result_wkb.rawSpatialRDD.count(
        ) == spatial_rdd.rawSpatialRDD.count()
Example #23
0
    def test_spatial_range_query_using_index(self):
        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True,
                              StorageLevel.MEMORY_ONLY)
        object_rdd.buildIndex(point_rdd_index_type, False)
        object_rdd.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY)
        assert object_rdd.indexedRawRDD.is_cached

        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                       range_query_window,
                                                       False, True).count
Example #24
0
    def test_spatial_range_query(self):
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)

        for i in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                       range_query_window,
                                                       False, False).count()
            logging.info(result_size)
Example #25
0
 def test_crs_transformed_spatial_range_query(self):
     object_rdd = PointRDD(sparkContext=self.sc,
                           InputLocation=point_rdd_input_location,
                           Offset=point_rdd_offset,
                           splitter=point_rdd_splitter,
                           carryInputData=False,
                           newLevel=StorageLevel.DISK_ONLY,
                           sourceEpsgCRSCode="epsg:4326",
                           targetEpsgCode="epsg:3005")
     for i in range(each_query_loop_times):
         result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                    range_query_window,
                                                    False, False)
Example #26
0
    def test_crs_transformation_spatial_range_query(self):
        object_rdd = PointRDD(self.sc, point_rdd_input_location,
                              point_rdd_offset, point_rdd_splitter, True,
                              StorageLevel.MEMORY_ONLY, "epsg:4326",
                              "epsg:3005")

        object_rdd.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY)
        assert object_rdd.rawSpatialRDD.is_cached
        for _ in range(each_query_loop_times):
            result_size = RangeQuery.SpatialRangeQuery(object_rdd,
                                                       range_query_window,
                                                       False, False).count()
            assert result_size > -1
    def test_spatial_range_query(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               StorageLevel.MEMORY_ONLY, "epsg:4326",
                               "epsg:3005")

        for i in range(loop_times):
            result_size = RangeQuery.SpatialRangeQuery(spatial_rdd,
                                                       query_envelope, False,
                                                       False).count()
            assert result_size == 3127

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, query_envelope, False,
            False).take(10)[1].getUserData() is not None
Example #28
0
    def test_point_rdd(self):
        point_rdd = PointRDD(sparkContext=self.sc,
                             InputLocation=point_rdd_input_location,
                             Offset=point_rdd_offset,
                             splitter=point_rdd_splitter,
                             carryInputData=False)

        collected_points = point_rdd.getRawSpatialRDD().collect()

        points_coordinates = [[-88.331492, 32.324142], [-88.175933, 32.360763],
                              [-88.388954, 32.357073], [-88.221102, 32.35078]]

        assert [[geo_data.geom.x, geo_data.geom.y]
                for geo_data in collected_points[:4]] == points_coordinates[:4]
Example #29
0
    def test_crs_transform(self):
        spatial_rdd = PointRDD(
            sparkContext=self.sc,
            InputLocation=crs_test_point,
            Offset=0,
            splitter=splitter,
            carryInputData=True,
            partitions=numPartitions,
            newLevel=StorageLevel.MEMORY_ONLY
        )

        spatial_rdd.CRSTransform("epsg:4326", "epsg:3857")

        assert spatial_rdd.rawSpatialRDD.collect()[0].geom.wkt == "POINT (-9833016.710450118 3805934.914254189)"
    def test_spatial_join_query(self):
        point_rdd = PointRDD(self.sc, point_path, 4, FileDataSplitter.WKT,
                             True)

        polygon_rdd = PolygonRDD(self.sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        point_rdd.analyze()
        point_rdd.spatialPartitioning(GridType.KDBTREE)
        polygon_rdd.spatialPartitioning(point_rdd.getPartitioner())
        result = JoinQuery.SpatialJoinQuery(point_rdd, polygon_rdd, True,
                                            False)

        print(result.count())