Example #1
0
def try_get_airflow_context_from_spark_conf():
    # type: ()-> Optional[AirflowTaskContext]
    if not spark_tracking_enabled() or _SPARK_ENV_FLAG not in os.environ:
        _debug_init_print(
            "DBND__ENABLE__SPARK_CONTEXT_ENV or SPARK_ENV_LOADED are not set")
        return None

    if not _is_dbnd_spark_installed():
        _debug_init_print("failed to import pyspark or dbnd-spark")
        return None
    try:
        _debug_init_print("creating spark context to get spark conf")
        from pyspark import SparkContext

        conf = SparkContext.getOrCreate().getConf()

        dag_id = conf.get("spark.env.AIRFLOW_CTX_DAG_ID")
        execution_date = conf.get("spark.env.AIRFLOW_CTX_EXECUTION_DATE")
        task_id = conf.get("spark.env.AIRFLOW_CTX_TASK_ID")
        try_number = conf.get("spark.env.AIRFLOW_CTX_TRY_NUMBER")

        if dag_id and task_id and execution_date:
            return AirflowTaskContext(
                dag_id=dag_id,
                execution_date=execution_date,
                task_id=task_id,
                try_number=try_number,
            )
    except Exception as ex:
        logger.info("Failed to get airflow context info from spark job: %s",
                    ex)

    return None
Example #2
0
def verify_spark_pre_conditions():
    if spark_tracking_enabled() and _SPARK_ENV_FLAG in os.environ:
        if _is_dbnd_spark_installed():
            return True
        else:
            _debug_init_print("failed to import pyspark or dbnd-spark")
    else:
        _debug_init_print(
            "DBND__ENABLE__SPARK_CONTEXT_ENV or SPARK_ENV_LOADED are not set")
    return False
Example #3
0
def dbnd_setup_plugin():
    from dbnd_spark.local.local_spark_config import SparkLocalEngineConfig
    from dbnd_spark.spark_bootstrap import dbnd_spark_bootstrap

    register_config_cls(SparkLocalEngineConfig)
    register_config_cls(LivySparkConfig)

    dbnd_spark_bootstrap()

    if has_pyspark_imported() and spark_tracking_enabled():
        config_store = read_spark_environ_config()
        dbnd_config.set_values(config_store, "system")
    else:
        _debug_init_print(
            "spark conf is not loaded since pyspark is not imported or DBND__ENABLE__SPARK_CONTEXT_ENV is not set"
        )
Example #4
0
def get_from_env_or_spark_env(key):
    value = os.environ.get(key)
    if value:
        return value

    # spark guards
    if not spark_tracking_enabled() or _SPARK_ENV_FLAG not in os.environ:
        return None

    if not _is_dbnd_spark_installed():
        return None

    try:
        from pyspark import SparkContext

        conf = SparkContext.getOrCreate().getConf()
        value = conf.get("spark.env." + key)
        if value:
            return value
    except:
        return None