def spark(self): if not hasattr(self, "__spark"): upload_jars() spark = SparkSession. \ builder. \ config("spark.serializer", KryoSerializer.getName).\ config("spark.kryo.registrator", GeoSparkKryoRegistrator.getName) .\ master("local[*]").\ getOrCreate() GeoSparkRegistrator.registerAll(spark) setattr(self, "__spark", spark) return getattr(self, "__spark")
import os import pytest from pyspark.sql import SparkSession from geo_pyspark.core.formatMapper.geo_json_reader import GeoJsonReader from geo_pyspark.register import GeoSparkRegistrator, upload_jars from tests.tools import tests_path upload_jars() spark = SparkSession.\ builder.\ master("local").\ getOrCreate() GeoSparkRegistrator.\ registerAll(spark) sc = spark.sparkContext geo_json_contains_id = os.path.join(tests_path, "resources/testContainsId.json") geo_json_geom_with_feature_property = os.path.join( tests_path, "resources/testPolygon.json") geo_json_geom_without_feature_property = os.path.join( tests_path, "resources/testpolygon-no-property.json") geo_json_with_invalid_geometries = os.path.join( tests_path, "resources/testInvalidPolygon.json") geo_json_with_invalid_geom_with_feature_property = os.path.join(
db_properties["user"] = "******" db_properties["password"] = "******" db_properties["driver"] = "org.postgresql.Driver" db_properties["numPartitions"] = num_partitions # db_properties["partitionColumn"] = "geometric_boundary_id" db_properties["queryTimeout"] = "10000" db_properties["batchsize"] = "99999999" db_properties["isolationLevel"] = "READ_COMMITTED" df = spark.sql( "select b.id as geo_id, b.type as geo_type, count(distinct t.id) as trails,SUM(case when t.geom is not null then ST_Length(ST_GeomFromWKT(ST_AsText(ST_Intersection(b.geom, t.geom)))) else 0.0 end) * 0.000621371 as trail_length_mi FROM geographic_boundary b left JOIN trails t ON b.geom is not null and t.geom is not null and b.disable = False and ST_Intersects(" "b.geom, t.geom) group by b.id, b.type") # df.show(20) df.write.jdbc(url=url, table="tmp.trails", properties=db_properties, mode="overwrite") if __name__ == '__main__': conf = SparkConf().setAll(pairs).setMaster("local[*]") spark = SparkSession.builder.config(conf=conf).getOrCreate() flag = upload_jars() print(flag) loaded = GeoSparkRegistrator.registerAll(spark) print( "########################################################################################################################################################################", flush=True) print("GEOSPARK LOADED : ", loaded, flush=True) process(spark)