def track_emr_add_steps_operator(operator, tracking_info): flat_spark_envs = flat_conf(add_spark_env_fields(tracking_info)) for step in operator.steps: args = step["HadoopJarStep"]["Args"] if args and "spark-submit" in args[0]: step["HadoopJarStep"]["Args"] = spark_submit_with_dbnd_tracking( args, dbnd_context=flat_spark_envs )
def track_spark_submit_operator(operator, tracking_info): if operator._conf is None: operator._conf = dict() spark_envs = add_spark_env_fields(tracking_info) operator._conf.update(spark_envs) if operator._env_vars is None: operator._env_vars = dict() dbnd_env_vars = dbnd_wrap_spark_environment() operator._env_vars.update(dbnd_env_vars) if _has_java_application(operator): agent_conf = get_spark_submit_java_agent_conf() if agent_conf is not None: operator._conf.update(agent_conf)
def track_data_proc_pyspark_operator(operator, tracking_info): if operator.dataproc_properties is None: operator.dataproc_properties = dict() spark_envs = add_spark_env_fields(tracking_info) operator.dataproc_properties.update(spark_envs)