Example #1
0
核心组件
pyspark.SparkContext
Main entry point for Spark functionality.

pyspark.RDD
A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
"""

conf = SparkConf()
conf.setAppName("appName")
conf.setMaster("local")
conf.set("key", "value")  # setIfMissing setSparkHome
print(conf.getAll())
print(conf.get('spark.app.name'))
print(conf.contains('spark.app.name'))
print(conf.toDebugString())

# show_profiles()
conf.set('spark.python.profile', 'true')

# Spark功能的主入口点。SparkContext表示到Spark集群的连接,可用于在该集群上创建RDD和广播变量。
# 每个JVM只有一个SparkContext是活动的。在创建新的SparkContext之前,必须停止()活动SparkContext。
# class pyspark.SparkContext(master=None, appName=None, sparkHome=None, pyFiles=None, environment=None, batchSize=0, serializer=PickleSerializer(), conf=None, gateway=None, jsc=None, profiler_cls=<class 'pyspark.profiler.BasicProfiler'>)
# sc = SparkContext(conf=conf)
print(
    "--------------------SparkConf结束---------SparkContext开始-------------------------------"
)
sc = SparkContext(conf=conf)

sc.setLogLevel("WARN")