def test_process_spark_submit_log_k8s(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_k8s_cluster')
        log_lines = [
            'INFO  LoggingPodStatusWatcherImpl:54 - State changed, new state:'
            + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
            'namespace: default' +
            'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,'
            + 'spark-role -> driver' +
            'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' +
            'creation time: 2018-03-05T10:26:55Z' +
            'service account name: spark' +
            'volumes: spark-init-properties, download-jars-volume,' +
            'download-files-volume, spark-token-2vmlm' + 'node name: N/A' +
            'start time: N/A' + 'container images: N/A' + 'phase: Pending' +
            'status: []' +
            '2018-03-05 11:26:56 INFO  LoggingPodStatusWatcherImpl:54 - State changed,'
            + ' new state:' +
            'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
            'namespace: default' + 'Exit code: 999'
        ]

        # When
        hook._process_spark_submit_log(log_lines)

        # Then
        self.assertEqual(hook._kubernetes_driver_pod,
                         'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver')
        self.assertEqual(hook._spark_exit_code, 999)
    def test_yarn_process_on_kill(self, mock_popen):
        # Given
        mock_popen.return_value.stdout = io.StringIO('stdout')
        mock_popen.return_value.stderr = io.StringIO('stderr')
        mock_popen.return_value.poll.return_value = None
        mock_popen.return_value.wait.return_value = 0
        log_lines = [
            'SPARK_MAJOR_VERSION is set to 2, using Spark2',
            'WARN NativeCodeLoader: Unable to load native-hadoop library for your '
            + 'platform... using builtin-java classes where applicable',
            'WARN DomainSocketFactory: The short-circuit local reads feature cannot '
            + 'be used because libhadoop cannot be loaded.',
            'INFO Client: Requesting a new application from cluster with 10 ' +
            'NodeManagerapplication_1486558679801_1820s',
            'INFO Client: Submitting application application_1486558679801_1820 '
            + 'to ResourceManager'
        ]
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
        hook._process_spark_submit_log(log_lines)
        hook.submit()

        # When
        hook.on_kill()

        # Then
        self.assertIn(
            call([
                'yarn', 'application', '-kill',
                'application_1486558679801_1820'
            ],
                 stderr=-1,
                 stdout=-1), mock_popen.mock_calls)
    def test_build_spark_submit_command(self):
        # Given
        hook = SparkSubmitHook(**self._config)

        # When
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_build_cmd = [
            'spark-submit', '--master', 'yarn', '--conf',
            'parquet.compression=SNAPPY', '--files', 'hive-site.xml',
            '--py-files', 'sample_library.py', '--archives',
            'sample_archive.zip#SAMPLE', '--jars', 'parquet.jar', '--packages',
            'com.databricks:spark-avro_2.11:3.2.0', '--exclude-packages',
            'org.bad.dependency:1.0.0', '--repositories', 'http://myrepo.org',
            '--num-executors', '10', '--total-executor-cores', '4',
            '--executor-cores', '4', '--executor-memory', '22g',
            '--driver-memory', '3g', '--keytab', 'privileged_user.keytab',
            '--principal', 'user/[email protected]', '--proxy-user',
            'sample_user', '--name', 'spark-job', '--class',
            'com.foo.bar.AppMain', '--verbose', 'test_application.py', '-f',
            'foo', '--bar', 'bar', '--with-spaces',
            'args should keep embdedded spaces', 'baz'
        ]
        self.assertEqual(expected_build_cmd, cmd)
        def env_vars_exception_in_standalone_cluster_mode():
            # Given
            hook = SparkSubmitHook(conn_id='spark_standalone_cluster',
                                   env_vars={"bar": "foo"})

            # When
            hook._build_spark_submit_command(self._spark_job_file)
Esempio n. 5
0
    def test_build_command(self):
        # Given
        hook = SparkSubmitHook(**self._config)

        # When
        cmd = hook._build_command(self._spark_job_file)

        # Then
        expected_build_cmd = [
            'spark-submit',
            '--master', 'yarn',
            '--conf', 'parquet.compression=SNAPPY',
            '--files', 'hive-site.xml',
            '--py-files', 'sample_library.py',
            '--jars', 'parquet.jar',
            '--num-executors', '10',
            '--total-executor-cores', '4',
            '--executor-cores', '4',
            '--executor-memory', '22g',
            '--driver-memory', '3g',
            '--keytab', 'privileged_user.keytab',
            '--principal', 'user/[email protected]',
            '--name', 'spark-job',
            '--class', 'com.foo.bar.AppMain',
            '--verbose',
            'test_application.py',
            '-f', 'foo',
            '--bar', 'bar',
            '--with-spaces', 'args should keep embdedded spaces',
            'baz'
        ]
        self.assertEquals(expected_build_cmd, cmd)
Esempio n. 6
0
 def execute(self, context):
     """
     Call the SparkSubmitHook to run the provided spark job
     """
     self._hook = SparkSubmitHook(
         conf=self._conf,
         conn_id=self._conn_id,
         files=self._files,
         py_files=self._py_files,
         archives=self._archives,
         driver_class_path=self._driver_class_path,
         jars=self._jars,
         java_class=self._java_class,
         packages=self._packages,
         exclude_packages=self._exclude_packages,
         repositories=self._repositories,
         total_executor_cores=self._total_executor_cores,
         executor_cores=self._executor_cores,
         executor_memory=self._executor_memory,
         driver_memory=self._driver_memory,
         keytab=self._keytab,
         principal=self._principal,
         proxy_user=self._proxy_user,
         name=self._name,
         num_executors=self._num_executors,
         status_poll_interval=self._status_poll_interval,
         application_args=self._application_args,
         env_vars=self._env_vars,
         verbose=self._verbose,
         spark_binary=self._spark_binary)
     self._hook.submit(self._application)
 def execute(self, context):
     """
     Call the SparkSubmitHook to run the provided spark job
     """
     self._hook = SparkSubmitHook(
         conf=self._conf,
         conn_id=self._conn_id,
         files=self._files,
         py_files=self._py_files,
         jars=self._jars,
         java_class=self._java_class,
         packages=self._packages,
         exclude_packages=self._exclude_packages,
         repositories=self._repositories,
         total_executor_cores=self._total_executor_cores,
         executor_cores=self._executor_cores,
         executor_memory=self._executor_memory,
         driver_memory=self._driver_memory,
         keytab=self._keytab,
         principal=self._principal,
         name=self._name,
         num_executors=self._num_executors,
         application_args=self._application_args,
         verbose=self._verbose)
     self._hook.submit(self._application)
Esempio n. 8
0
    def test_submit(self):
        hook = SparkSubmitHook()

        # We don't have spark-submit available, and this is hard to mock, so just accept
        # an exception for now.
        with self.assertRaises(AirflowException):
            hook.submit(self._spark_job_file)
Esempio n. 9
0
    def test_build_command(self):
        hook = SparkSubmitHook(**self._config)

        # The subprocess requires an array but we build the cmd by joining on a space
        cmd = ' '.join(hook._build_command(self._spark_job_file))

        # Check if the URL gets build properly and everything exists.
        assert self._spark_job_file in cmd

        # Check all the parameters
        assert "--files {}".format(self._config['files']) in cmd
        assert "--py-files {}".format(self._config['py_files']) in cmd
        assert "--jars {}".format(self._config['jars']) in cmd
        assert "--executor-cores {}".format(
            self._config['executor_cores']) in cmd
        assert "--executor-memory {}".format(
            self._config['executor_memory']) in cmd
        assert "--keytab {}".format(self._config['keytab']) in cmd
        assert "--principal {}".format(self._config['principal']) in cmd
        assert "--name {}".format(self._config['name']) in cmd
        assert "--num-executors {}".format(
            self._config['num_executors']) in cmd

        # Check if all config settings are there
        for k in self._config['conf']:
            assert "--conf {0}={1}".format(k, self._config['conf'][k]) in cmd

        if self._config['verbose']:
            assert "--verbose" in cmd
Esempio n. 10
0
    def _run_spark_submit(self, application, jars):
        assert_airflow_package_installed()
        from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook
        from airflow.exceptions import AirflowException

        # task_env = get_cloud_config(Clouds.local)
        spark_local_config = SparkLocalEngineConfig()
        _config = self.config
        deploy = self.deploy

        spark = SparkSubmitHook(
            conf=_config.conf,
            conn_id=spark_local_config.conn_id,
            name=self.job.job_id,
            application_args=list_of_strings(self.task.application_args()),
            java_class=self.task.main_class,
            files=deploy.arg_files(_config.files),
            py_files=deploy.arg_files(self.task.get_py_files()),
            driver_class_path=_config.driver_class_path,
            jars=deploy.arg_files(jars),
            packages=_config.packages,
            exclude_packages=_config.exclude_packages,
            repositories=_config.repositories,
            total_executor_cores=_config.total_executor_cores,
            executor_cores=_config.executor_cores,
            executor_memory=_config.executor_memory,
            driver_memory=_config.driver_memory,
            keytab=_config.keytab,
            principal=_config.principal,
            num_executors=_config.num_executors,
            env_vars=self._get_env_vars(),
            verbose=_config.verbose,
        )

        log_buffer = StringIO()
        with log_buffer as lb:
            dbnd_log_handler = self._capture_submit_log(spark, lb)
            try:
                spark.submit(application=application)
            except AirflowException as ex:
                return_code = self._get_spark_return_code_from_exception(ex)
                if return_code != "0":
                    error_snippets = parse_spark_log_safe(
                        log_buffer.getvalue().split(os.linesep))
                    raise failed_to_run_spark_script(
                        self,
                        spark._build_spark_submit_command(
                            application=application),
                        application,
                        return_code,
                        error_snippets,
                    )
                else:
                    raise failed_spark_status(ex)
            finally:
                spark.log.handlers = [
                    h for h in spark.log.handlers if not dbnd_log_handler
                ]
Esempio n. 11
0
 def test_submit(self, mock_process):
     # We don't have spark-submit available, and this is hard to mock, so let's
     # just use this simple mock.
     mock_Popen = mock_process.Popen.return_value
     mock_Popen.stdout = StringIO(u'stdout')
     mock_Popen.stderr = StringIO(u'stderr')
     mock_Popen.returncode = None
     mock_Popen.communicate.return_value = ['extra stdout', 'extra stderr']
     hook = SparkSubmitHook()
     hook.submit(self._spark_job_file)
    def test_resolve_spark_submit_env_vars_standalone_client_mode(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_standalone_cluster_client_mode',
                               env_vars={"bar": "foo"})

        # When
        hook._build_spark_submit_command(self._spark_job_file)

        # Then
        self.assertEqual(hook._env, {"bar": "foo"})
    def test_resolve_spark_submit_env_vars_k8s(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_k8s_cluster',
                               env_vars={"bar": "foo"})

        # When
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        self.assertEqual(cmd[4], "spark.kubernetes.driverEnv.bar=foo")
    def test_resolve_should_track_driver_status(self):
        # Given
        hook_default = SparkSubmitHook(conn_id='')
        hook_spark_yarn_cluster = SparkSubmitHook(conn_id='spark_yarn_cluster')
        hook_spark_k8s_cluster = SparkSubmitHook(conn_id='spark_k8s_cluster')
        hook_spark_default_mesos = SparkSubmitHook(
            conn_id='spark_default_mesos')
        hook_spark_home_set = SparkSubmitHook(conn_id='spark_home_set')
        hook_spark_home_not_set = SparkSubmitHook(conn_id='spark_home_not_set')
        hook_spark_binary_set = SparkSubmitHook(conn_id='spark_binary_set')
        hook_spark_binary_and_home_set = SparkSubmitHook(
            conn_id='spark_binary_and_home_set')
        hook_spark_standalone_cluster = SparkSubmitHook(
            conn_id='spark_standalone_cluster')

        # When
        should_track_driver_status_default = hook_default \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_yarn_cluster = hook_spark_yarn_cluster \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_k8s_cluster = hook_spark_k8s_cluster \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_default_mesos = hook_spark_default_mesos \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_home_set = hook_spark_home_set \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_home_not_set = hook_spark_home_not_set \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_binary_set = hook_spark_binary_set \
            ._resolve_should_track_driver_status()
        should_track_driver_status_spark_binary_and_home_set = \
            hook_spark_binary_and_home_set._resolve_should_track_driver_status()
        should_track_driver_status_spark_standalone_cluster = \
            hook_spark_standalone_cluster._resolve_should_track_driver_status()

        # Then
        self.assertEqual(should_track_driver_status_default, False)
        self.assertEqual(should_track_driver_status_spark_yarn_cluster, False)
        self.assertEqual(should_track_driver_status_spark_k8s_cluster, False)
        self.assertEqual(should_track_driver_status_spark_default_mesos, False)
        self.assertEqual(should_track_driver_status_spark_home_set, False)
        self.assertEqual(should_track_driver_status_spark_home_not_set, False)
        self.assertEqual(should_track_driver_status_spark_binary_set, False)
        self.assertEqual(should_track_driver_status_spark_binary_and_home_set,
                         False)
        self.assertEqual(should_track_driver_status_spark_standalone_cluster,
                         True)
    def test_resolve_spark_submit_env_vars_yarn(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster',
                               env_vars={"bar": "foo"})

        # When
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        self.assertEqual(cmd[4], "spark.yarn.appMasterEnv.bar=foo")
        self.assertEqual(hook._env, {"bar": "foo"})
    def test_spark_process_runcmd(self, mock_popen):
        # Given
        mock_popen.return_value.stdout = six.StringIO('stdout')
        mock_popen.return_value.stderr = six.StringIO('stderr')
        mock_popen.return_value.wait.return_value = 0

        # When
        hook = SparkSubmitHook(conn_id='')
        hook.submit()

        # Then
        self.assertEqual(mock_popen.mock_calls[0], call(['spark-submit', '--master', 'yarn', '--name', 'default-name', ''], stderr=-2, stdout=-1, universal_newlines=True, bufsize=-1))
Esempio n. 17
0
    def test_resolve_connection_spark_home_not_set_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_home_not_set')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        self.assertSequenceEqual(connection,
                                 ('yarn://yarn-master', None, None, None))
        self.assertEqual(cmd[0], 'spark-submit')
Esempio n. 18
0
    def test_resolve_connection_yarn_default(self):
        # Given
        hook = SparkSubmitHook(conn_id='')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        self.assertSequenceEqual(connection, ('yarn', None, None, None))
        self.assertEqual(dict_cmd["--master"], "yarn")
    def test_spark_process_runcmd(self, mock_popen):
        # Given
        mock_popen.return_value.stdout = StringIO(u'stdout')
        mock_popen.return_value.stderr = StringIO(u'stderr')
        mock_popen.return_value.returncode = 0
        mock_popen.return_value.communicate.return_value = [StringIO(u'stdout\nstdout'), StringIO(u'stderr\nstderr')]

        # When
        hook = SparkSubmitHook(conn_id='')
        hook.submit()

        # Then
        self.assertEqual(mock_popen.mock_calls[0], call(['spark-submit', '--master', 'yarn', '--name', 'default-name', ''], stderr=-1, stdout=-1))
Esempio n. 20
0
    def test_resolve_connection_mesos_default_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_default_mesos')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        self.assertSequenceEqual(connection,
                                 ('mesos://host:5050', None, None, None))
        self.assertEqual(dict_cmd["--master"], "mesos://host:5050")
    def test_resolve_connection(self):

        # Default to the standard yarn connection because conn_id does not exists
        hook = SparkSubmitHook(conn_id='')
        self.assertEqual(hook._resolve_connection(), ('yarn', None, None, None))
        assert "--master yarn" in ' '.join(hook._build_command(self._spark_job_file))

        # Default to the standard yarn connection
        hook = SparkSubmitHook(conn_id='spark_default')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn', 'root.default', None, None)
        )
        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master yarn" in cmd
        assert "--queue root.default" in cmd

        # Connect to a mesos master
        hook = SparkSubmitHook(conn_id='spark_default_mesos')
        self.assertEqual(
            hook._resolve_connection(),
            ('mesos://host:5050', None, None, None)
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master mesos://host:5050" in cmd

        # Set specific queue and deploy mode
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn://yarn-master', 'root.etl', 'cluster', None)
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master yarn://yarn-master" in cmd
        assert "--queue root.etl" in cmd
        assert "--deploy-mode cluster" in cmd

        # Set the spark home
        hook = SparkSubmitHook(conn_id='spark_home_set')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn://yarn-master', None, None, '/opt/myspark')
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert cmd.startswith('/opt/myspark/bin/spark-submit')

        # Spark home not set
        hook = SparkSubmitHook(conn_id='spark_home_not_set')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn://yarn-master', None, None, None)
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert cmd.startswith('spark-submit')
Esempio n. 22
0
    def test_resolve_connection_spark_yarn_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        self.assertSequenceEqual(
            connection, ('yarn://yarn-master', 'root.etl', 'cluster', None))
        self.assertEqual(dict_cmd["--master"], "yarn://yarn-master")
        self.assertEqual(dict_cmd["--queue"], "root.etl")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
Esempio n. 23
0
    def test_process_log(self):
        # Must select yarn connection
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')

        log_lines = [
            'SPARK_MAJOR_VERSION is set to 2, using Spark2',
            'WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable',
            'WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.',
            'INFO Client: Requesting a new application from cluster with 10 NodeManagers',
            'INFO Client: Submitting application application_1486558679801_1820 to ResourceManager'
        ]

        hook._process_log(log_lines)

        assert hook._yarn_application_id == 'application_1486558679801_1820'
    def test_process_spark_submit_log_standalone_cluster(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_standalone_cluster')
        log_lines = [
            'Running Spark using the REST application submission protocol.',
            '17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request '
            'to launch an application in spark://spark-standalone-master:6066',
            '17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' +
            'created as driver-20171128111415-0001. Polling submission state...'
        ]
        # When
        hook._process_spark_submit_log(log_lines)

        # Then

        self.assertEqual(hook._driver_id, 'driver-20171128111415-0001')
    def test_resolve_connection_spark_standalone_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_standalone_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "spark://spark-standalone-master:6066",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": "cluster",
                                     "queue": None,
                                     "spark_home": "/path/to/spark_home"}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], '/path/to/spark_home/bin/spark-submit')
    def test_resolve_connection_spark_binary_set_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_binary_set')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "custom-spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], 'custom-spark-submit')
    def test_resolve_connection_mesos_default_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_default_mesos')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "mesos://host:5050",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "mesos://host:5050")
Esempio n. 28
0
    def test_resolve_connection_spark_binary_default_value(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_default')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": 'root.default',
                                     "spark_home": None,
                                     "namespace": 'default'}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], 'spark-submit')
Esempio n. 29
0
    def test_resolve_connection_yarn_default(self):
        # Given
        hook = SparkSubmitHook(conn_id='')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None,
                                     "namespace": 'default'}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "yarn")
    def test_resolve_connection_spark_yarn_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "yarn://yarn-master",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": "cluster",
                                     "queue": "root.etl",
                                     "spark_home": None}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "yarn://yarn-master")
        self.assertEqual(dict_cmd["--queue"], "root.etl")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")