def compare_spatial_rdd(self, spatial_rdd: SpatialRDD, envelope: Envelope) -> bool: spatial_rdd.analyze() assert input_count == spatial_rdd.approximateTotalCount assert envelope == spatial_rdd.boundaryEnvelope return True
def partition_rdds(self, query_rdd: SpatialRDD, spatial_rdd: SpatialRDD, grid_type, use_legacy_apis): spatial_rdd.spatialPartitioning(grid_type) if use_legacy_apis: if grid_type != GridType.QUADTREE: query_rdd.spatialPartitioning(spatial_rdd.grids) else: query_rdd.spatialPartitioning(spatial_rdd.partitionTree) else: query_rdd.spatialPartitioning(spatial_rdd.getPartitioner())
def test_to_rdd_from_dataframe(self): spatial_df = self._create_spatial_point_table() spatial_df.show() jsrdd = Adapter.toRdd(spatial_df) spatial_rdd = SpatialRDD(self.sc) spatial_rdd.rawJvmSpatialRDD = jsrdd spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 121960 assert spatial_rdd.boundaryEnvelope == Envelope( -179.147236, 179.475569, -14.548699, 71.35513400000001)
def toSpatialRdd(cls, dataFrame: DataFrame): """ :param dataFrame: :return: """ sc = dataFrame._sc jvm = sc._jvm srdd = jvm.Adapter.toSpatialRdd(dataFrame._jdf) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, sc: SparkContext, inputPath: str) -> SpatialRDD: """ :param sc: SparkContext :param inputPath: str, file input location :return: SpatialRDD """ GeoJsonReader.validate_imports() jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD( sc._jsc, inputPath ) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def toSpatialRdd(cls, dataFrame: DataFrame, geometryFieldName: str) -> SpatialRDD: """ :param dataFrame: :param geometryFieldName: :return: """ sc = dataFrame._sc jvm = sc._jvm srdd = jvm.Adapter.toSpatialRdd(dataFrame._jdf, geometryFieldName) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, rawTextRDD: RDD) -> SpatialRDD: """ :param rawTextRDD: RDD :return: SpatialRDD """ GeoJsonReader.validate_imports() sc = rawTextRDD.ctx jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD( rawTextRDD._jrdd ) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, sc: SparkContext, inputPath: str) -> SpatialRDD: """ :param sc: :param inputPath: :return: """ ShapefileReader.validate_imports() jvm = sc._jvm jsc = sc._jsc srdd = jvm.ShapefileReader.readToGeometryRDD( jsc, inputPath ) spatial_rdd = SpatialRDD(sc=sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, rawTextRDD: RDD, allowInvalidGeometries: bool, skipSyntacticallyInvalidGeometries: bool) -> SpatialRDD: """ :param rawTextRDD: RDD :param allowInvalidGeometries: bool :param skipSyntacticallyInvalidGeometries: bool :return: SpatialRDD """ GeoJsonReader.validate_imports() sc = rawTextRDD.ctx jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD( rawTextRDD._jrdd, allowInvalidGeometries, skipSyntacticallyInvalidGeometries ) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def readToGeometryRDD(cls, sc: SparkContext, inputPath: str, allowInvalidGeometries: bool, skipSyntacticallyInvalidGeometries: bool) -> SpatialRDD: """ :param sc: SparkContext :param inputPath: str, path to the file :param allowInvalidGeometries: bool :param skipSyntacticallyInvalidGeometries: bool :return: SpatialRDD """ GeoJsonReader.validate_imports() jvm = sc._jvm srdd = jvm.GeoJsonReader.readToGeometryRDD( sc._jsc, inputPath, allowInvalidGeometries, skipSyntacticallyInvalidGeometries ) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def toSpatialRdd(cls, dataFrame: DataFrame, fieldNames: List) -> SpatialRDD: """ :param dataFrame: :param geometryFieldName: :param fieldNames: :return: """ sc = dataFrame._sc jvm = sc._jvm srdd = jvm.PythonAdapterWrapper.toSpatialRdd(dataFrame._jdf, fieldNames) spatial_rdd = SpatialRDD(sc) spatial_rdd.set_srdd(srdd) return spatial_rdd
def test_read_csv_point_into_spatial_rdd_by_passing_coordinates(self): df = self.spark.read.format("csv").\ option("delimiter", ",").\ option("header", "false").\ load(area_lm_point_input_location) df.show() df.createOrReplaceTempView("inputtable") spatial_df = self.spark.sql( "select ST_Point(cast(inputtable._c0 as Decimal(24,20)),cast(inputtable._c1 as Decimal(24,20))) as arealandmark from inputtable" ) spatial_df.show() spatial_df.printSchema() spatial_rdd = SpatialRDD(self.spark.sparkContext) spatial_rdd.rawJvmSpatialRDD = Adapter.toRdd(spatial_df) spatial_rdd.analyze() assert (Adapter.toDf(spatial_rdd, self.spark).columns.__len__() == 1) Adapter.toDf(spatial_rdd, self.spark).show()
def compare_count(self, spatial_rdd: SpatialRDD, cnt: int, envelope: Envelope): spatial_rdd.analyze() assert cnt == spatial_rdd.approximateTotalCount assert envelope == spatial_rdd.boundaryEnvelope
def prepare_rdd(self, object_rdd: SpatialRDD, window_rdd: SpatialRDD, grid_type: GridType): object_rdd.analyze() window_rdd.analyze() object_rdd.rawSpatialRDD.repartition(4) object_rdd.spatialPartitioning(grid_type) object_rdd.buildIndex(IndexType.RTREE, True) window_rdd.spatialPartitioning(object_rdd.getPartitioner())