コード例 #1
0
def start_spark(app_name='my_spark_app',
                master='local[*]',
                jar_packages=[],
                spark_config={},
                ssc_config={},
                kafka_config={},
                callback=None):

    spark_conf = SparkConf().setAppName(app_name).setMaster(master)

    if (spark_config):
        for config in spark_config:
            spark_conf.setIfMissing(config, spark_config[config])

    sc = SparkContext(conf=spark_conf)
    sc.setLogLevel("WARN")
    spark = SparkSession(sparkContext=sc)

    ssc = StreamingContext(sc, ssc_config['batchDuration'])
    kafka_stream = KafkaUtils.createDirectStream(
        ssc, topics=kafka_config['topics'],
        kafkaParams=kafka_config['config']).map(lambda msg: json.loads(msg[1]))
    if callback:
        callback(kafka_stream=kafka_stream, sc=sc, spark_conf=spark_conf)
    ssc.start()
    ssc.awaitTermination()
コード例 #2
0
    def start_spark(app_name='my_spark_app', master='local[*]', jar_packages=[],
                    spark_config={}, ssc_config={'batchDuration':5}, callback=None):

            spark_conf = SparkConf().setAppName(app_name).setMaster(master)

            if(spark_config):
                for config in spark_config:
                    spark_conf.setIfMissing(config,spark_config[config])

            if jar_packages:
                spark_jar_packages = '--packages ' + ','.join(jar_packages)
                os.environ["PYSPARK_SUBMIT_ARGS"] = spark_jar_packages

            sc = SparkContext(conf=spark_conf)
            spark = SparkSession(sparkContext=sc)


            # create session and retrieve Spark logger object
            # spark_logger = logging.Log4j(spark)

            ssc = StreamingContext(sc, ssc_config['batchDuration'])

            if callback:
               callback(spark=spark,ssc=ssc)

            ssc.start()
            ssc.awaitTermination()
コード例 #3
0
ファイル: utils.py プロジェクト: antarahealth/openmrs-elt
    def getSparkConf():
        config = PipelineUtils.getConfig()['spark']
        packages = '--packages ' + ','.join(config['packages'])
        os.environ['PYSPARK_SUBMIT_ARGS'] = (packages + ' pyspark-shell')

        spark_conf = SparkConf().setAppName(config['appName']).setMaster(
            config['master'])
        if (config['conf']):
            for key in config['conf']:
                spark_conf.setIfMissing(key, config['conf'][key])
        return spark_conf
コード例 #4
0
 def configure(app_name="Sparkling Water Demo"):
     conf = SparkConf()
     conf.setAppName(app_name)
     conf.setIfMissing("spark.master", os.getenv("spark.master", "local[*]"))
     return conf
コード例 #5
0
 def configure(app_name="Sparkling Water Demo"):
     conf = SparkConf()
     conf.setAppName(app_name)
     conf.setIfMissing("spark.master", os.getenv("spark.master",
                                                 "local[*]"))
     return conf