예제 #1
0
    def test_creating_point_rdd(self):
        point_rdd = PointRDD(self.spark._sc, point_path, 4,
                             FileDataSplitter.WKT, True)

        point_rdd.analyze()
        cnt = point_rdd.countWithoutDuplicates()
        assert cnt == 12872, f"Point RDD should have 12872 but found {cnt}"
예제 #2
0
    def test_equal_partitioning(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=input_location,
                               Offset=offset,
                               splitter=splitter,
                               carryInputData=False,
                               partitions=10,
                               newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.EQUALGRID)

        for envelope in spatial_rdd.grids:
            print("PointRDD spatial partitioning grids: " + str(envelope))
        assert spatial_rdd.countWithoutDuplicates(
        ) == spatial_rdd.countWithoutDuplicatesSPRDD()
예제 #3
0
    def test_r_tree_spatial_partitioning(self):
        spatial_rdd = PointRDD(sparkContext=self.sc,
                               InputLocation=input_location,
                               Offset=offset,
                               splitter=splitter,
                               carryInputData=True,
                               partitions=10,
                               newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.RTREE)

        for envelope in spatial_rdd.grids:
            print(envelope)

        assert spatial_rdd.countWithoutDuplicates(
        ) == spatial_rdd.countWithoutDuplicatesSPRDD()