Exemple #1
0
    def spark(self):
        if not hasattr(self, "__spark"):
            upload_jars()

            spark = SparkSession. \
                builder. \
                config("spark.serializer", KryoSerializer.getName).\
                config("spark.kryo.registrator", GeoSparkKryoRegistrator.getName) .\
                master("local[*]").\
                getOrCreate()

            GeoSparkRegistrator.registerAll(spark)

            setattr(self, "__spark", spark)
        return getattr(self, "__spark")
Exemple #2
0
import os

import pytest
from pyspark.sql import SparkSession

from geo_pyspark.core.formatMapper.geo_json_reader import GeoJsonReader
from geo_pyspark.register import GeoSparkRegistrator, upload_jars
from tests.tools import tests_path

upload_jars()


spark = SparkSession.\
    builder.\
    master("local").\
    getOrCreate()

GeoSparkRegistrator.\
    registerAll(spark)

sc = spark.sparkContext

geo_json_contains_id = os.path.join(tests_path,
                                    "resources/testContainsId.json")
geo_json_geom_with_feature_property = os.path.join(
    tests_path, "resources/testPolygon.json")
geo_json_geom_without_feature_property = os.path.join(
    tests_path, "resources/testpolygon-no-property.json")
geo_json_with_invalid_geometries = os.path.join(
    tests_path, "resources/testInvalidPolygon.json")
geo_json_with_invalid_geom_with_feature_property = os.path.join(
    db_properties["user"] = "******"
    db_properties["password"] = "******"
    db_properties["driver"] = "org.postgresql.Driver"
    db_properties["numPartitions"] = num_partitions
    # db_properties["partitionColumn"] = "geometric_boundary_id"
    db_properties["queryTimeout"] = "10000"
    db_properties["batchsize"] = "99999999"
    db_properties["isolationLevel"] = "READ_COMMITTED"

    df = spark.sql(
        "select b.id as geo_id, b.type as geo_type, count(distinct t.id) as trails,SUM(case when t.geom is not null then ST_Length(ST_GeomFromWKT(ST_AsText(ST_Intersection(b.geom, t.geom)))) else 0.0 end) * 0.000621371  as trail_length_mi FROM geographic_boundary b left JOIN trails t ON b.geom is not null and t.geom is not null and b.disable = False and ST_Intersects("
        "b.geom, t.geom) group by b.id, b.type")
    # df.show(20)
    df.write.jdbc(url=url,
                  table="tmp.trails",
                  properties=db_properties,
                  mode="overwrite")


if __name__ == '__main__':
    conf = SparkConf().setAll(pairs).setMaster("local[*]")
    spark = SparkSession.builder.config(conf=conf).getOrCreate()
    flag = upload_jars()
    print(flag)
    loaded = GeoSparkRegistrator.registerAll(spark)
    print(
        "########################################################################################################################################################################",
        flush=True)
    print("GEOSPARK LOADED : ", loaded, flush=True)
    process(spark)