Example #1
0
 def test_load_id_column_data_check(self):
     spatial_rdd = PolygonRDD(self.spark.sparkContext, geojson_id_input_location, FileDataSplitter.GEOJSON, True)
     spatial_rdd.analyze()
     df = Adapter.toDf(spatial_rdd, self.spark)
     df.show()
     assert df.columns.__len__() == 4
     assert df.count() == 1
Example #2
0
    def test_creating_polygon_rdd(self):
        polygon_rdd = PolygonRDD(self.spark._sc, counties_path, 2, 3,
                                 FileDataSplitter.WKT, True)

        polygon_rdd.analyze()

        cnt = polygon_rdd.countWithoutDuplicates()

        assert cnt == 407, f"Polygon RDD should have 407 but found {cnt}"
Example #3
0
 def test_build_index_without_set_grid(self):
     spatial_rdd = PolygonRDD(self.sc,
                              input_location,
                              FileDataSplitter.CSV,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.buildIndex(IndexType.RTREE, False)
Example #4
0
    def test_geojson_to_dataframe(self):
        spatial_rdd = PolygonRDD(
            self.spark.sparkContext, geojson_input_location, FileDataSplitter.GEOJSON, True
        )

        spatial_rdd.analyze()

        df = Adapter.toDf(spatial_rdd, self.spark).\
            withColumn("geometry", expr("ST_GeomFromWKT(geometry)"))
        df.show()
        assert (df.columns[1] == "STATEFP")
Example #5
0
 def test_wkb_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location_wkb,
                              splitter=FileDataSplitter.WKB,
                              carryInputData=True,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     assert spatial_rdd.approximateTotalCount == 103
     assert spatial_rdd.boundaryEnvelope is not None
     assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData(
     ) == "31\t039\t00835841\t31039\tCuming\tCuming County\t06\tH1\tG4020\t\t\t\tA\t1477895811\t10447360\t+41.9158651\t-096.7885168"
Example #6
0
    def test_voronoi_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=FileDataSplitter.CSV,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.VORONOI)

        for envelope in spatial_rdd.grids:
            print(envelope)
Example #7
0
    def test_hilbert_curve_spatial_partitioning(self):
        spatial_rdd = PolygonRDD(sparkContext=self.sc,
                                 InputLocation=input_location,
                                 splitter=splitter,
                                 carryInputData=True,
                                 partitions=10,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        spatial_rdd.analyze()
        spatial_rdd.spatialPartitioning(GridType.HILBERT)

        for envelope in spatial_rdd.grids:
            print(envelope)
Example #8
0
 def test_empty_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location,
                              splitter=splitter,
                              carryInputData=True,
                              partitions=num_partitions,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     spatial_rdd.spatialPartitioning(grid_type)
     spatial_rdd.buildIndex(IndexType.RTREE, True)
     spatial_rdd_copy = PolygonRDD()
     spatial_rdd_copy.rawSpatialRDD = spatial_rdd
     spatial_rdd_copy.analyze()
Example #9
0
 def test_geojson_constructor(self):
     spatial_rdd = PolygonRDD(sparkContext=self.sc,
                              InputLocation=input_location_geo_json,
                              splitter=FileDataSplitter.GEOJSON,
                              carryInputData=True,
                              partitions=4,
                              newLevel=StorageLevel.MEMORY_ONLY)
     spatial_rdd.analyze()
     assert spatial_rdd.approximateTotalCount == 1001
     assert spatial_rdd.boundaryEnvelope is not None
     assert spatial_rdd.rawSpatialRDD.take(1)[0].getUserData(
     ) == "01\t077\t011501\t5\t1500000US010770115015\t010770115015\t5\tBG\t6844991\t32636"
     assert spatial_rdd.rawSpatialRDD.take(2)[1].getUserData(
     ) == "01\t045\t021102\t4\t1500000US010450211024\t010450211024\t4\tBG\t11360854\t0"
     assert spatial_rdd.fieldNames == [
         "STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE", "AFFGEOID", "GEOID",
         "NAME", "LSAD", "ALAND", "AWATER"
     ]
Example #10
0
    def test_to_df_srdd_fn_spark(self):
        spatial_rdd = PolygonRDD(
            self.spark.sparkContext, geojson_input_location, FileDataSplitter.GEOJSON, True
        )
        spatial_rdd.analyze()
        assert spatial_rdd.approximateTotalCount == 1001

        spatial_columns = [
                "state_id", "county_id", "tract_id", "bg_id",
                "fips", "fips_short", "bg_nr", "type", "code1", "code2"
            ]
        spatial_df = Adapter.toDf(
            spatial_rdd,
            spatial_columns,
            self.spark
        )

        spatial_df.show()

        assert spatial_df.columns == ["geometry", *spatial_columns]
        assert spatial_df.count() == 1001
Example #11
0
    def test_constructor(self):
        spatial_rdd_core = PolygonRDD(sparkContext=self.sc,
                                      InputLocation=input_location,
                                      splitter=splitter,
                                      carryInputData=True,
                                      partitions=num_partitions,
                                      newLevel=StorageLevel.MEMORY_ONLY)
        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True,
                                      num_partitions, StorageLevel.MEMORY_ONLY)

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)
        spatial_rdd = PolygonRDD(
            rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD)
        self.compare_spatial_rdd(spatial_rdd, input_boundary)
        spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD,
                                 "epsg:4326", "epsg:5070")
        self.compare_spatial_rdd(spatial_rdd, query_envelope)
        assert spatial_rdd.getSourceEpsgCode() == "epsg:4326"
        assert spatial_rdd.getTargetEpsgCode() == "epsg:5070"
        spatial_rdd = PolygonRDD(
            rawSpatialRDD=spatial_rdd_core.rawJvmSpatialRDD,
            sourceEpsgCode="epsg:4326",
            targetEpsgCode="epsg:5070")
        assert spatial_rdd.getSourceEpsgCode() == "epsg:4326"
        assert spatial_rdd.getTargetEpsgCode() == "epsg:5070"
        self.compare_spatial_rdd(spatial_rdd, query_envelope)
        spatial_rdd = PolygonRDD(rawSpatialRDD=spatial_rdd.rawJvmSpatialRDD,
                                 newLevel=StorageLevel.MEMORY_ONLY)
        self.compare_spatial_rdd(spatial_rdd, query_envelope)
        spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD,
                                 StorageLevel.MEMORY_ONLY)
        self.compare_spatial_rdd(spatial_rdd, input_boundary)
        spatial_rdd = PolygonRDD()

        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True, 2)
        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True)
        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True,
                                      num_partitions)

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True)

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True, 5,
                                      StorageLevel.MEMORY_ONLY)

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True,
                                      StorageLevel.MEMORY_ONLY)

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True,
                                      5, StorageLevel.MEMORY_ONLY)

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True,
                                      StorageLevel.MEMORY_ONLY)

        self.compare_spatial_rdd(spatial_rdd_core, input_boundary)

        spatial_rdd = PolygonRDD(spatial_rdd_core.rawJvmSpatialRDD,
                                 StorageLevel.MEMORY_ONLY, "epsg:4326",
                                 "epsg:5070")
        self.compare_spatial_rdd(spatial_rdd, query_envelope)

        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True, 5,
                                      StorageLevel.MEMORY_ONLY, "epsg:4326",
                                      "epsg:5070")

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        query_window_rdd = PolygonRDD(self.sc, polygon_rdd_input_location,
                                      polygon_rdd_start_offset,
                                      polygon_rdd_end_offset,
                                      polygon_rdd_splitter, True,
                                      StorageLevel.MEMORY_ONLY, "epsg:4326",
                                      "epsg:5070")

        assert query_window_rdd.analyze()
        assert query_window_rdd.approximateTotalCount == 3000

        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True,
                                      5, StorageLevel.MEMORY_ONLY, "epsg:4326",
                                      "epsg:5070")

        self.compare_spatial_rdd(spatial_rdd_core, query_envelope)
        spatial_rdd_core = PolygonRDD(self.sc, input_location, splitter, True,
                                      StorageLevel.MEMORY_ONLY, "epsg:4326",
                                      "epsg:5070")

        spatial_rdd_core = PolygonRDD(sparkContext=self.sc,
                                      InputLocation=input_location,
                                      splitter=splitter,
                                      carryInputData=True,
                                      newLevel=StorageLevel.MEMORY_ONLY,
                                      sourceEpsgCRSCode="epsg:4326",
                                      targetEpsgCode="epsg:5070")

        self.compare_spatial_rdd(spatial_rdd_core, query_envelope)