Ejemplo n.º 1
0
    def _load_feature_rdd(self, file_rdd_factory: JavaObject,
                          projected_polygons, from_date: str, to_date: str,
                          zoom: int,
                          tile_size: int) -> Tuple[pyspark.RDD, JavaObject]:
        logger.info(
            "Loading feature JSON RDD from {f}".format(f=file_rdd_factory))
        json_rdd = file_rdd_factory.loadSpatialFeatureJsonRDD(
            projected_polygons, from_date, to_date, zoom, tile_size)
        jrdd = json_rdd._1()
        layer_metadata_sc = json_rdd._2()

        # Decode/unwrap the JavaRDD of JSON blobs we built in Scala,
        # additionally pickle-serialized by the PySpark adaption layer.
        j2p_rdd = self.jvm.SerDe.javaToPython(jrdd)
        serializer = pyspark.serializers.PickleSerializer()
        pyrdd = geopyspark.create_python_rdd(j2p_rdd, serializer=serializer)
        pyrdd = pyrdd.map(json.loads)
        return pyrdd, layer_metadata_sc