def spark_context():
    conf = pyspark.SparkConf()
    conf.set(key="spark.jars.packages", value=get_mleap_jars())
    # Exclude `net.sourceforge.f2j` to avoid `java.io.FileNotFoundException`
    conf.set(key="spark.jars.excludes", value="net.sourceforge.f2j:arpack_combined_all")
    spark_session = get_spark_session(conf)
    return spark_session.sparkContext
Beispiel #2
0
def spark_context():
    if Version(pyspark.__version__) < Version("3.1"):
        # A workaround for this issue:
        # https://stackoverflow.com/questions/62109276/errorjava-lang-unsupportedoperationexception-for-pyspark-pandas-udf-documenta
        spark_home = (os.environ.get("SPARK_HOME") if "SPARK_HOME"
                      in os.environ else os.path.dirname(pyspark.__file__))
        conf_dir = os.path.join(spark_home, "conf")
        os.makedirs(conf_dir, exist_ok=True)
        with open(os.path.join(conf_dir, "spark-defaults.conf"), "w") as f:
            conf = """
spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"
spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"
"""
            f.write(conf)
    conf = pyspark.SparkConf()
    max_tries = 3
    for num_tries in range(max_tries):
        try:
            spark = get_spark_session(conf)
            return spark.sparkContext
        except Exception as e:
            if num_tries >= max_tries - 1:
                raise
            _logger.exception(
                e, "Attempt %s to create a SparkSession failed, retrying..." %
                num_tries)
def spark_context():
    conf = pyspark.SparkConf()
    conf.set(
        key="spark.jars.packages",
        value=("ml.combust.mleap:mleap-spark-base_2.11:0.12.0,"
               "ml.combust.mleap:mleap-spark_2.11:0.12.0"),
    )
    spark_session = get_spark_session(conf)
    return spark_session.sparkContext
def _get_spark_session_with_retry(max_tries=3):
    conf = pyspark.SparkConf()
    for num_tries in range(max_tries):
        try:
            return get_spark_session(conf)
        except Exception as e:
            if num_tries >= max_tries - 1:
                raise
            _logger.exception(
                e, "Attempt %s to create a SparkSession failed, retrying..." % num_tries
            )
Beispiel #5
0
def spark_context():
    conf = pyspark.SparkConf()
    max_tries = 3
    for num_tries in range(max_tries):
        try:
            spark = get_spark_session(conf)
            return spark.sparkContext
        except Exception as e:
            if num_tries >= max_tries - 1:
                raise
            _logger.exception(
                e, "Attempt %s to create a SparkSession failed, retrying..." %
                num_tries)
def spark_context():
    conf = pyspark.SparkConf()
    conf.set(key="spark.jars.packages",
             value='ml.combust.mleap:mleap-spark-base_2.11:0.12.0,'
                   'ml.combust.mleap:mleap-spark_2.11:0.12.0')
    max_tries = 3
    for num_tries in range(max_tries):
        try:
            spark = get_spark_session(conf)
            return spark.sparkContext
        except Exception as e:
            if num_tries >= max_tries - 1:
                raise
            _logger.exception(e, "Attempt %s to create a SparkSession failed, retrying..." %
                              num_tries)