Beispiel #1
0
    def test_exec_failure(self, db_mock_class):
        """
        Test the execute function in case where the run failed.
        """
        run = {
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS
        }
        op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run)
        db_mock = db_mock_class.return_value
        db_mock.run_now.return_value = 1
        db_mock.get_run_state.return_value = RunState('TERMINATED', 'FAILED',
                                                      '')

        with self.assertRaises(AirflowException):
            op.execute(None)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS,
            'job_id': JOB_ID
        })
        db_mock_class.assert_called_once_with(
            DEFAULT_CONN_ID,
            retry_limit=op.databricks_retry_limit,
            retry_delay=op.databricks_retry_delay)
        db_mock.run_now.assert_called_once_with(expected)
        db_mock.get_run_page_url.assert_called_once_with(RUN_ID)
        db_mock.get_run_state.assert_called_once_with(RUN_ID)
        self.assertEqual(RUN_ID, op.run_id)
Beispiel #2
0
    def test_init_with_json(self):
        """
        Test the initializer with json data.
        """
        json = {
            'notebook_params': NOTEBOOK_PARAMS,
            'jar_params': JAR_PARAMS,
            'python_params': PYTHON_PARAMS,
            'spark_submit_params': SPARK_SUBMIT_PARAMS,
            'job_id': JOB_ID
        }
        op = DatabricksRunNowOperator(task_id=TASK_ID, json=json)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params':
            NOTEBOOK_PARAMS,
            'jar_params':
            JAR_PARAMS,
            'python_params':
            PYTHON_PARAMS,
            'spark_submit_params':
            SPARK_SUBMIT_PARAMS,
            'job_id':
            JOB_ID
        })

        self.assertDictEqual(expected, op.json)
Beispiel #3
0
    def test_init_with_merging(self):
        """
        Test the initializer when json and other named parameters are both
        provided. The named parameters should override top level keys in the
        json dict.
        """
        override_notebook_params = {'workers': 999}
        json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS}

        op = DatabricksRunNowOperator(task_id=TASK_ID,
                                      json=json,
                                      job_id=JOB_ID,
                                      notebook_params=override_notebook_params,
                                      python_params=PYTHON_PARAMS,
                                      spark_submit_params=SPARK_SUBMIT_PARAMS)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params':
            override_notebook_params,
            'jar_params':
            JAR_PARAMS,
            'python_params':
            PYTHON_PARAMS,
            'spark_submit_params':
            SPARK_SUBMIT_PARAMS,
            'job_id':
            JOB_ID
        })

        self.assertDictEqual(expected, op.json)
Beispiel #4
0
    def test_exec_failure(self, db_mock_class):
        """
        Test the execute function in case where the run failed.
        """
        run = {
            'new_cluster': NEW_CLUSTER,
            'notebook_task': NOTEBOOK_TASK,
        }
        op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=run)
        db_mock = db_mock_class.return_value
        db_mock.submit_run.return_value = 1
        db_mock.get_run_state.return_value = RunState('TERMINATED', 'FAILED',
                                                      '')

        with self.assertRaises(AirflowException):
            op.execute(None)

        expected = databricks_operator._deep_string_coerce({
            'new_cluster': NEW_CLUSTER,
            'notebook_task': NOTEBOOK_TASK,
            'run_name': TASK_ID,
        })
        db_mock_class.assert_called_once_with(
            DEFAULT_CONN_ID,
            retry_limit=op.databricks_retry_limit,
            retry_delay=op.databricks_retry_delay)
        db_mock.submit_run.assert_called_once_with(expected)
        db_mock.get_run_page_url.assert_called_once_with(RUN_ID)
        db_mock.get_run_state.assert_called_once_with(RUN_ID)
        self.assertEqual(RUN_ID, op.run_id)
Beispiel #5
0
    def test_exec_success(self, db_mock_class):
        """
        Test the execute function in case where the run is successful.
        """
        run = {
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS
        }
        op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run)
        db_mock = db_mock_class.return_value
        db_mock.run_now.return_value = 1
        db_mock.get_run_state.return_value = RunState('TERMINATED', 'SUCCESS',
                                                      '')

        op.execute(None)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS,
            'job_id': JOB_ID,
        })

        db_mock_class.assert_called_once_with(
            DEFAULT_CONN_ID,
            retry_limit=op.databricks_retry_limit,
            retry_delay=op.databricks_retry_delay)
        db_mock.run_now.assert_called_once_with(expected)
        db_mock.get_run_page_url.assert_called_once_with(RUN_ID)
        db_mock.get_run_state.assert_called_once_with(RUN_ID)
        assert RUN_ID == op.run_id
Beispiel #6
0
    def test_exec_success(self, db_mock_class):
        """
        Test the execute function in case where the run is successful.
        """
        run = {
            'new_cluster': NEW_CLUSTER,
            'notebook_task': NOTEBOOK_TASK,
        }
        op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=run)
        db_mock = db_mock_class.return_value
        db_mock.submit_run.return_value = 1
        db_mock.get_run_state.return_value = RunState('TERMINATED', 'SUCCESS',
                                                      '')

        op.execute(None)

        expected = databricks_operator._deep_string_coerce({
            'new_cluster': NEW_CLUSTER,
            'notebook_task': NOTEBOOK_TASK,
            'run_name': TASK_ID
        })
        db_mock_class.assert_called_once_with(
            DEFAULT_CONN_ID,
            retry_limit=op.databricks_retry_limit,
            retry_delay=op.databricks_retry_delay)

        db_mock.submit_run.assert_called_once_with(expected)
        db_mock.get_run_page_url.assert_called_once_with(RUN_ID)
        db_mock.get_run_state.assert_called_once_with(RUN_ID)
        assert RUN_ID == op.run_id
Beispiel #7
0
    def test_init_with_named_parameters(self):
        """
        Test the initializer with the named parameters.
        """
        op = DatabricksRunNowOperator(job_id=JOB_ID, task_id=TASK_ID)
        expected = databricks_operator._deep_string_coerce({'job_id': 42})

        self.assertDictEqual(expected, op.json)
Beispiel #8
0
 def test_init_with_specified_run_name(self):
     """
     Test the initializer with a specified run_name.
     """
     json = {'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': RUN_NAME}
     op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=json)
     expected = databricks_operator._deep_string_coerce(
         {'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': RUN_NAME}
     )
     self.assertDictEqual(expected, op.json)
Beispiel #9
0
    def test_init_with_spark_python_task_named_parameters(self):
        """
        Test the initializer with the named parameters.
        """
        op = DatabricksSubmitRunOperator(
            task_id=TASK_ID, new_cluster=NEW_CLUSTER, spark_python_task=SPARK_PYTHON_TASK
        )
        expected = databricks_operator._deep_string_coerce(
            {'new_cluster': NEW_CLUSTER, 'spark_python_task': SPARK_PYTHON_TASK, 'run_name': TASK_ID}
        )

        self.assertDictEqual(expected, op.json)
Beispiel #10
0
 def test_init_with_json(self):
     """
     Test the initializer with json data.
     """
     json = {'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK}
     op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=json)
     expected = databricks_operator._deep_string_coerce({
         'new_cluster': NEW_CLUSTER,
         'notebook_task': NOTEBOOK_TASK,
         'run_name': TASK_ID
     })
     assert expected == op.json
Beispiel #11
0
    def test_init_with_named_parameters(self):
        """
        Test the initializer with the named parameters.
        """
        op = DatabricksSubmitRunOperator(task_id=TASK_ID,
                                         new_cluster=NEW_CLUSTER,
                                         notebook_task=NOTEBOOK_TASK)
        expected = databricks_operator._deep_string_coerce({
            'new_cluster': NEW_CLUSTER,
            'notebook_task': NOTEBOOK_TASK,
            'run_name': TASK_ID
        })

        self.assertDictEqual(expected, op.json)
Beispiel #12
0
 def test_init_with_templating(self):
     json = {
         'new_cluster': NEW_CLUSTER,
         'notebook_task': TEMPLATED_NOTEBOOK_TASK,
     }
     dag = DAG('test', start_date=datetime.now())
     op = DatabricksSubmitRunOperator(dag=dag, task_id=TASK_ID, json=json)
     op.render_template_fields(context={'ds': DATE})
     expected = databricks_operator._deep_string_coerce({
         'new_cluster': NEW_CLUSTER,
         'notebook_task': RENDERED_TEMPLATED_NOTEBOOK_TASK,
         'run_name': TASK_ID,
     })
     self.assertDictEqual(expected, op.json)
Beispiel #13
0
    def test_init_with_templating(self):
        json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': TEMPLATED_JAR_PARAMS}

        dag = DAG('test', start_date=datetime.now())
        op = DatabricksRunNowOperator(dag=dag, task_id=TASK_ID, job_id=JOB_ID, json=json)
        op.render_template_fields(context={'ds': DATE})
        expected = databricks_operator._deep_string_coerce(
            {
                'notebook_params': NOTEBOOK_PARAMS,
                'jar_params': RENDERED_TEMPLATED_JAR_PARAMS,
                'job_id': JOB_ID,
            }
        )
        self.assertDictEqual(expected, op.json)
Beispiel #14
0
    def test_deep_string_coerce(self):
        test_json = {
            'test_int': 1,
            'test_float': 1.0,
            'test_dict': {'key': 'value'},
            'test_list': [1, 1.0, 'a', 'b'],
            'test_tuple': (1, 1.0, 'a', 'b'),
        }

        expected = {
            'test_int': '1',
            'test_float': '1.0',
            'test_dict': {'key': 'value'},
            'test_list': ['1', '1.0', 'a', 'b'],
            'test_tuple': ['1', '1.0', 'a', 'b'],
        }
        self.assertDictEqual(databricks_operator._deep_string_coerce(test_json), expected)
Beispiel #15
0
 def test_init_with_merging(self):
     """
     Test the initializer when json and other named parameters are both
     provided. The named parameters should override top level keys in the
     json dict.
     """
     override_new_cluster = {'workers': 999}
     json = {
         'new_cluster': NEW_CLUSTER,
         'notebook_task': NOTEBOOK_TASK,
     }
     op = DatabricksSubmitRunOperator(task_id=TASK_ID,
                                      json=json,
                                      new_cluster=override_new_cluster)
     expected = databricks_operator._deep_string_coerce({
         'new_cluster': override_new_cluster,
         'notebook_task': NOTEBOOK_TASK,
         'run_name': TASK_ID,
     })
     self.assertDictEqual(expected, op.json)