def test_overlapped_linestring_join_correctness(self):
        window_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set),
            StorageLevel.MEMORY_ONLY)
        object_rdd = LineStringRDD(
            self.sc.parallelize(self.test_overlapped_linestring_set),
            StorageLevel.MEMORY_ONLY)
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True,
                                            True).collect()
        self.verify_join_result(result)

        result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd,
                                                     False, True).collect()
        self.verify_join_result(result_no_index)
    def test_outside_point_join_correctness(self):
        self.once_before_all()
        window_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set),
            StorageLevel.MEMORY_ONLY)
        object_rdd = PointRDD(self.sc.parallelize(self.test_outside_point_set),
                              StorageLevel.MEMORY_ONLY)
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True,
                                            False).collect()
        assert 0 == result.__len__()

        result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd,
                                                     False, False).collect()
        assert 0 == result_no_index.__len__()
    def test_inside_point_join_correctness(self):
        self.once_before_all()

        window_rdd = PolygonRDD(
            self.sc.parallelize(self.test_polygon_window_set))

        object_rdd = PointRDD(self.sc.parallelize(self.test_inside_point_set))
        self.prepare_rdd(object_rdd, window_rdd, GridType.QUADTREE)

        result = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd, True,
                                            False).collect()
        self.verify_join_result(result)

        result_no_index = JoinQuery.SpatialJoinQuery(object_rdd, window_rdd,
                                                     False, False).collect()
        self.verify_join_result(result_no_index)
예제 #4
0
    def test_nested_loop(self, num_partitions, use_legacy_apis, grid_type, intersects):
        query_rdd = self.create_polygon_rdd(query_polygon_set, splitter, num_partitions)
        spatial_rdd = self.create_polygon_rdd(input_location, splitter, num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)

        result = JoinQuery.SpatialJoinQuery(
            spatial_rdd, query_rdd, False, intersects).collect()

        self.sanity_check_join_results(result)
        assert self.get_expected_count(intersects) == self.count_join_results(result)
예제 #5
0
    def nested_loop(self, query_rdd, num_partitions, grid_type,
                    use_legacy_apis, expected_count):
        spatial_rdd = self.create_point_rdd(input_location, splitter,
                                            num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        self.sanity_check_join_results(result)
        assert expected_count == self.count_join_results(result)
예제 #6
0
    def test_spatial_join_query(self):
        point_rdd = PointRDD(self.sc, point_path, 4, FileDataSplitter.WKT,
                             True)

        polygon_rdd = PolygonRDD(self.sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        point_rdd.analyze()
        point_rdd.spatialPartitioning(GridType.KDBTREE)
        polygon_rdd.spatialPartitioning(point_rdd.getPartitioner())
        result = JoinQuery.SpatialJoinQuery(point_rdd, polygon_rdd, True,
                                            False)

        print(result.count())
예제 #7
0
    def test_index_int(self, num_partitions, use_legacy_apis, grid_type,
                       index_type):
        query_rdd = self.create_rectangle_rdd(input_location, splitter,
                                              num_partitions)
        spatial_rdd = self.create_rectangle_rdd(input_location, splitter,
                                                num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)
        spatial_rdd.buildIndex(index_type, True)

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        self.sanity_check_join_results(result)
        assert match_count == self.count_join_results(result)
예제 #8
0
    def test_nested_loop(self, num_partitions, use_legacy_apis, grid_type):
        query_rdd = self.create_rectangle_rdd(input_location, splitter,
                                              num_partitions)
        spatial_rdd = self.create_rectangle_rdd(input_location, splitter,
                                                num_partitions)

        self.partition_rdds(query_rdd, spatial_rdd, grid_type, use_legacy_apis)

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()

        count = 0
        for el in result:
            count += el[1].__len__()
        self.sanity_check_join_results(result)
        assert match_count == self.count_join_results(result)
예제 #9
0
    def test_spatial_join_query_and_build_index_on_points_on_the_fly(self):
        query_window = PolygonRDD(self.sc, polygon_rdd_input_location,
                                  polygon_rdd_start_offset,
                                  polygon_rdd_end_offset, polygon_rdd_splitter,
                                  True)
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd.analyze()
        object_rdd.spatialPartitioning(join_query_partitionin_type)
        query_window.spatialPartitioning(object_rdd.getPartitioner())

        for i in range(each_query_loop_times):
            result_size = JoinQuery.SpatialJoinQuery(object_rdd, query_window,
                                                     True, False).count()
    def test_spatial_join_query_with_polygon_rdd(self):
        query_rdd = PolygonRDD(self.sc, input_location_query_polygon, splitter,
                               True, num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")

        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, True,
                               num_partitions, StorageLevel.MEMORY_ONLY,
                               "epsg:4326", "epsg:3005")
        spatial_rdd.spatialPartitioning(grid_type)

        query_rdd.spatialPartitioning(spatial_rdd.grids)

        result = JoinQuery.SpatialJoinQuery(spatial_rdd, query_rdd, False,
                                            True).collect()
        assert result[1][0].getUserData() is not None

        for data in result:
            if data[1].__len__() != 0:
                for right_data in data[1]:
                    assert right_data.getUserData() is not None