Esempio n. 1
0
    def test_resolve_connection(self):

        # Default to the standard yarn connection because conn_id does not exists
        hook = SparkSubmitHook(conn_id='')
        self.assertEqual(hook._resolve_connection(), ('yarn', None, None))
        assert "--master yarn" in ' '.join(
            hook._build_command(self._spark_job_file))

        # Default to the standard yarn connection
        hook = SparkSubmitHook(conn_id='spark_default')
        self.assertEqual(hook._resolve_connection(),
                         ('yarn', 'root.default', None))
        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master yarn" in cmd
        assert "--queue root.default" in cmd

        # Connect to a mesos master
        hook = SparkSubmitHook(conn_id='spark_default_mesos')
        self.assertEqual(hook._resolve_connection(),
                         ('mesos://host:5050', None, None))

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master mesos://host:5050" in cmd

        # Set specific queue and deploy mode
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
        self.assertEqual(hook._resolve_connection(),
                         ('yarn://yarn-master', 'root.etl', 'cluster'))

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master yarn://yarn-master" in cmd
        assert "--queue root.etl" in cmd
        assert "--deploy-mode cluster" in cmd
    def test_resolve_connection_spark_k8s_cluster_ns_conf(self):
        # Given we specify the config option directly
        conf = {
            'spark.kubernetes.namespace': 'airflow',
        }
        hook = SparkSubmitHook(conn_id='spark_k8s_cluster', conf=conf)

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {
            "spark_home": "/opt/spark",
            "queue": None,
            "spark_binary": "spark-submit",
            "master": "k8s://https://k8s-master",
            "deploy_mode": "cluster",
            "namespace": "airflow"
        }
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "k8s://https://k8s-master")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
        self.assertEqual(dict_cmd["--conf"],
                         "spark.kubernetes.namespace=airflow")
Esempio n. 3
0
    def test_resolve_connection_spark_home_not_set_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_home_not_set')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        self.assertSequenceEqual(connection,
                                 ('yarn://yarn-master', None, None, None))
        self.assertEqual(cmd[0], 'spark-submit')
Esempio n. 4
0
    def test_resolve_connection_yarn_default(self):
        # Given
        hook = SparkSubmitHook(conn_id='')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        self.assertSequenceEqual(connection, ('yarn', None, None, None))
        self.assertEqual(dict_cmd["--master"], "yarn")
Esempio n. 5
0
    def test_resolve_connection_mesos_default_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_default_mesos')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        self.assertSequenceEqual(connection,
                                 ('mesos://host:5050', None, None, None))
        self.assertEqual(dict_cmd["--master"], "mesos://host:5050")
Esempio n. 6
0
    def test_resolve_connection(self):

        # Default to the standard yarn connection because conn_id does not exists
        hook = SparkSubmitHook(conn_id='')
        self.assertEqual(hook._resolve_connection(), ('yarn', None, None, None))
        assert "--master yarn" in ' '.join(hook._build_command(self._spark_job_file))

        # Default to the standard yarn connection
        hook = SparkSubmitHook(conn_id='spark_default')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn', 'root.default', None, None)
        )
        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master yarn" in cmd
        assert "--queue root.default" in cmd

        # Connect to a mesos master
        hook = SparkSubmitHook(conn_id='spark_default_mesos')
        self.assertEqual(
            hook._resolve_connection(),
            ('mesos://host:5050', None, None, None)
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master mesos://host:5050" in cmd

        # Set specific queue and deploy mode
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn://yarn-master', 'root.etl', 'cluster', None)
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert "--master yarn://yarn-master" in cmd
        assert "--queue root.etl" in cmd
        assert "--deploy-mode cluster" in cmd

        # Set the spark home
        hook = SparkSubmitHook(conn_id='spark_home_set')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn://yarn-master', None, None, '/opt/myspark')
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert cmd.startswith('/opt/myspark/bin/spark-submit')

        # Spark home not set
        hook = SparkSubmitHook(conn_id='spark_home_not_set')
        self.assertEqual(
            hook._resolve_connection(),
            ('yarn://yarn-master', None, None, None)
        )

        cmd = ' '.join(hook._build_command(self._spark_job_file))
        assert cmd.startswith('spark-submit')
Esempio n. 7
0
    def test_resolve_connection_spark_yarn_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        self.assertSequenceEqual(
            connection, ('yarn://yarn-master', 'root.etl', 'cluster', None))
        self.assertEqual(dict_cmd["--master"], "yarn://yarn-master")
        self.assertEqual(dict_cmd["--queue"], "root.etl")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
    def test_resolve_connection_spark_binary_set_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_binary_set')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "custom-spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], 'custom-spark-submit')
    def test_resolve_connection_spark_standalone_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_standalone_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "spark://spark-standalone-master:6066",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": "cluster",
                                     "queue": None,
                                     "spark_home": "/path/to/spark_home"}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], '/path/to/spark_home/bin/spark-submit')
    def test_resolve_connection_mesos_default_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_default_mesos')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "mesos://host:5050",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "mesos://host:5050")
    def test_resolve_connection_spark_home_set_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_home_set')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "yarn://yarn-master",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": "/opt/myspark",
                                     "namespace": 'default'}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], '/opt/myspark/bin/spark-submit')
Esempio n. 12
0
    def test_resolve_connection_spark_binary_default_value(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_default')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": 'root.default',
                                     "spark_home": None,
                                     "namespace": 'default'}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(cmd[0], 'spark-submit')
Esempio n. 13
0
    def test_resolve_connection_yarn_default(self):
        # Given
        hook = SparkSubmitHook(conn_id='')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None,
                                     "namespace": 'default'}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "yarn")
    def test_resolve_connection_yarn_default(self):
        # Given
        hook = SparkSubmitHook(conn_id='')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "yarn",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": None,
                                     "queue": None,
                                     "spark_home": None,
                                     "namespace": 'default'}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "yarn")
    def test_resolve_connection_spark_yarn_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_yarn_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"master": "yarn://yarn-master",
                                     "spark_binary": "spark-submit",
                                     "deploy_mode": "cluster",
                                     "queue": "root.etl",
                                     "spark_home": None}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "yarn://yarn-master")
        self.assertEqual(dict_cmd["--queue"], "root.etl")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
Esempio n. 16
0
    def test_resolve_connection_spark_k8s_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_k8s_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"spark_home": "/opt/spark",
                                     "queue": None,
                                     "spark_binary": "spark-submit",
                                     "master": "k8s://https://k8s-master",
                                     "deploy_mode": "cluster",
                                     "namespace": "mynamespace"}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "k8s://https://k8s-master")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
    def test_resolve_connection_spark_k8s_cluster_connection(self):
        # Given
        hook = SparkSubmitHook(conn_id='spark_k8s_cluster')

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_spark_submit_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {"spark_home": "/opt/spark",
                                     "queue": None,
                                     "spark_binary": "spark-submit",
                                     "master": "k8s://https://k8s-master",
                                     "deploy_mode": "cluster",
                                     "namespace": "mynamespace"}
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "k8s://https://k8s-master")
        self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
Esempio n. 18
0
    def test_resolve_connection_mesos_cluster_env_connection(self):
        # Given
        conn_name = self.gen_conn_name(10)
        os.environ["AIRFLOW_CONN_SPARK_{}".format(
            conn_name.upper())] = "mesos://mesos-master:5050"
        hook = SparkSubmitHook(conn_id='spark_{}'.format(conn_name))

        # When
        connection = hook._resolve_connection()
        cmd = hook._build_command(self._spark_job_file)

        # Then
        dict_cmd = self.cmd_args_to_dict(cmd)
        expected_spark_connection = {
            "master": "mesos://mesos-master:5050",
            "spark_binary": "spark-submit",
            "deploy_mode": None,
            "queue": None,
            "spark_home": None
        }
        self.assertEqual(connection, expected_spark_connection)
        self.assertEqual(dict_cmd["--master"], "mesos://mesos-master:5050")