def test_spark_hook(self, _, __, ___, ____, _____, ______, current_task_run, mock_hook): _config = current_task_run.task.spark_config from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline word_count_inline.t(text=__file__).dbnd_run() mock_hook.assert_called_once_with( application_args=[], conf=_config.conf, conn_id="spark_default", driver_class_path=_config.driver_class_path, driver_memory=_config.driver_memory, env_vars={ "DBND_TASK_RUN_ATTEMPT_UID": str(current_task_run.task_run_attempt_uid) }, exclude_packages=_config.exclude_packages, executor_cores=_config.executor_cores, executor_memory=_config.executor_memory, files="", jars=str(_config.main_jar), java_class=current_task_run.task.main_class, keytab=_config.keytab, name=current_task_run.job_id, num_executors=_config.num_executors, packages=_config.packages, principal=_config.principal, py_files="", repositories=_config.repositories, total_executor_cores=_config.total_executor_cores, verbose=_config.verbose, )
def test_spark_inline_same_context(self): from pyspark.sql import SparkSession from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline with SparkSession.builder.getOrCreate() as sc: with config({SparkLocalEngineConfig.enable_spark_context_inplace: True}): assert_run_task(word_count_inline.t(text=__file__))
def test_word_count_inline(self): with dbnd_config(disable_tracker_api()): assert_run_task( word_count_inline.t( text=TEXT_FILE, task_version=str(random.random()), override=conf_override, ))
def test_word_count_inline(self): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline assert_run_task( word_count_inline.t( text=config.get("livy_tests", "text"), task_version=str(random.random()), override=conf_override, ))
def test_word_count_inline(self): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline assert_run_task( word_count_inline.t( text=TEXT_FILE, task_version=str(random.random()), override=conf_override, ) )
def test_spark_inline(self): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline # Solve "tests" module conflict on pickle loading after spark-submit parent_directory = os.path.dirname( os.path.dirname(os.path.realpath(__file__))) with dbnd_config( {SparkConfig.env_vars: { "PYTHONPATH": parent_directory }}): assert_run_task(word_count_inline.t(text=__file__))
def test_spark_hook(self, _, __, ___, ____, _____, current_task_run, mock_hook): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline word_count_inline.t(text=__file__).dbnd_run() _config = current_task_run.task.spark_config # check that the call for the hook got the config matching the task_config mock_hook.assert_called_once_with( application_args=[], conf=_config.conf, conn_id="spark_default", driver_class_path=_config.driver_class_path, driver_memory=_config.driver_memory, env_vars=mock.ANY, exclude_packages=_config.exclude_packages, executor_cores=_config.executor_cores, executor_memory=_config.executor_memory, files="", jars=str(_config.main_jar), java_class=current_task_run.task.main_class, keytab=_config.keytab, name=current_task_run.job_id, num_executors=_config.num_executors, packages=_config.packages, principal=_config.principal, py_files="", repositories=_config.repositories, total_executor_cores=_config.total_executor_cores, verbose=_config.verbose, ) # The env vars contain more information but we want to check only the "DBND_TASK_RUN_ATTEMPT_UID" env_var called_with_env_vars = mock_hook.call_args_list[0].kwargs["env_vars"] assert called_with_env_vars["DBND_TASK_RUN_ATTEMPT_UID"] == str( current_task_run.task_run_attempt_uid)
def test_spark_complete(self, monkeypatch): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline # Solve "tests" module conflict on pickle loading after spark-submit parent_directory = os.path.dirname( os.path.dirname(os.path.realpath(__file__))) with dbnd_config( {SparkConfig.env_vars: { "PYTHONPATH": parent_directory }}): t = word_count_inline.t(text=__file__) dir_target = mock.MagicMock(DirTarget) dir_target.exists = mock.Mock(return_value=False) monkeypatch.setattr(t, "_get_dir_outputs", lambda: [dir_target]) assert t._complete() is False
def test_spark_inline(self): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline assert_run_task(word_count_inline.t(text=__file__))