def test_spatial_knn_correctness(self): point_rdd = PointRDD(self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") result_no_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) point_rdd.buildIndex(IndexType.RTREE, False) result_with_index = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, True) sorted_result_no_index = sorted( result_no_index, key=lambda geo_data: distance_sorting_functions( geo_data, query_point)) sorted_result_with_index = sorted( result_with_index, key=lambda geo_data: distance_sorting_functions( geo_data, query_point)) difference = 0 for x in range(top_k): difference += sorted_result_no_index[x].geom.distance( sorted_result_with_index[x].geom) assert difference == 0
def test_spatial_knn_query_using_index(self): point_rdd = PointRDD(self.sc, input_location, offset, splitter, False) point_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result = KNNQuery.SpatialKnnQuery(point_rdd, self.query_point, self.top_k, False) assert result.__len__() > -1 assert result[0].getUserData() is not None
def test_build_index_without_set_grid(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False)
def test_knn_query_with_index(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.buildIndex(point_rdd_index_type, False) for i in range(each_query_loop_times): result = KNNQuery.SpatialKnnQuery(object_rdd, knn_query_point, 1000, True)
def test_spatial_knn_query_using_index(self): point_rdd = PointRDD(self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") point_rdd.buildIndex(IndexType.RTREE, False) for i in range(loop_times): result = KNNQuery.SpatialKnnQuery(point_rdd, query_point, top_k, False) assert result.__len__() > 0 assert result[0].getUserData() is not None
def test_range_query_using_index(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.buildIndex(point_rdd_index_type, False) for i in range(each_query_loop_times): result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, True).count
def test_empty_constructor(self): spatial_rdd = PointRDD(sparkContext=self.sc, InputLocation=input_location, Offset=offset, splitter=splitter, carryInputData=True, partitions=num_partitions, newLevel=StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False) spatial_rdd_copy = PointRDD() spatial_rdd_copy.rawJvmSpatialRDD = spatial_rdd.rawJvmSpatialRDD spatial_rdd_copy.analyze()
def test_spatial_range_query_using_index(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\ .count() assert result_size == 2830 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].\ getUserData() is not None
def test_crs_tranformed_spatial_range_query_using_index(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False, newLevel=StorageLevel.DISK_ONLY, sourceEpsgCRSCode="epsg:4326", targetEpsgCode="epsg:3005") object_rdd.buildIndex(point_rdd_index_type, False) for i in range(each_query_loop_times): result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, True).count
def test_spatial_range_query_using_index(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, query_envelope, False, False).count() assert result_size == 3127 assert RangeQuery.SpatialRangeQuery( spatial_rdd, query_envelope, False, False).take(10)[1].getUserData() is not None
def test_distance_join_query_using_index(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) query_window_rdd = CircleRDD(object_rdd, 0.1) object_rdd.analyze() object_rdd.spatialPartitioning(GridType.QUADTREE) query_window_rdd.spatialPartitioning(object_rdd.getPartitioner()) object_rdd.buildIndex(IndexType.RTREE, True) for i in range(each_query_loop_times): result_size = JoinQuery.DistanceJoinQuery(object_rdd, query_window_rdd, True, True).count
def test_spatial_join_using_index(self): query_window = PolygonRDD(self.sc, polygon_rdd_input_location, polygon_rdd_start_offset, polygon_rdd_end_offset, polygon_rdd_splitter, True) object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd.analyze() object_rdd.spatialPartitioning(join_query_partitionin_type) query_window.spatialPartitioning(object_rdd.getPartitioner()) object_rdd.buildIndex(point_rdd_index_type, True) for i in range(each_query_loop_times): result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window, True, False).count()
def test_spatial_join_query_with_polygon_rdd_using_index(self): query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True, num_partitions, StorageLevel.MEMORY_ONLY, "epsg:4326", "epsg:3005") spatial_rdd.spatialPartitioning(grid_type) spatial_rdd.buildIndex(IndexType.RTREE, True) query_rdd.spatialPartitioning(spatial_rdd.grids) result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False, True).collect() assert result[1][0].getUserData() is not None for data in result: if data[1].__len__() != 0: for right_data in data[1]: assert right_data.getUserData() is not None