def test_exec_failure(self, db_mock_class): """ Test the execute function in case where the run failed. """ run = { 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS } op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run) db_mock = db_mock_class.return_value db_mock.run_now.return_value = 1 db_mock.get_run_state.return_value = RunState('TERMINATED', 'FAILED', '') with self.assertRaises(AirflowException): op.execute(None) expected = databricks_operator._deep_string_coerce({ 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS, 'job_id': JOB_ID }) db_mock_class.assert_called_once_with( DEFAULT_CONN_ID, retry_limit=op.databricks_retry_limit, retry_delay=op.databricks_retry_delay) db_mock.run_now.assert_called_once_with(expected) db_mock.get_run_page_url.assert_called_once_with(RUN_ID) db_mock.get_run_state.assert_called_once_with(RUN_ID) self.assertEqual(RUN_ID, op.run_id)
def test_init_with_json(self): """ Test the initializer with json data. """ json = { 'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS, 'python_params': PYTHON_PARAMS, 'spark_submit_params': SPARK_SUBMIT_PARAMS, 'job_id': JOB_ID } op = DatabricksRunNowOperator(task_id=TASK_ID, json=json) expected = databricks_operator._deep_string_coerce({ 'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS, 'python_params': PYTHON_PARAMS, 'spark_submit_params': SPARK_SUBMIT_PARAMS, 'job_id': JOB_ID }) self.assertDictEqual(expected, op.json)
def test_init_with_merging(self): """ Test the initializer when json and other named parameters are both provided. The named parameters should override top level keys in the json dict. """ override_notebook_params = {'workers': 999} json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS} op = DatabricksRunNowOperator(task_id=TASK_ID, json=json, job_id=JOB_ID, notebook_params=override_notebook_params, python_params=PYTHON_PARAMS, spark_submit_params=SPARK_SUBMIT_PARAMS) expected = databricks_operator._deep_string_coerce({ 'notebook_params': override_notebook_params, 'jar_params': JAR_PARAMS, 'python_params': PYTHON_PARAMS, 'spark_submit_params': SPARK_SUBMIT_PARAMS, 'job_id': JOB_ID }) self.assertDictEqual(expected, op.json)
def test_exec_failure(self, db_mock_class): """ Test the execute function in case where the run failed. """ run = { 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, } op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=run) db_mock = db_mock_class.return_value db_mock.submit_run.return_value = 1 db_mock.get_run_state.return_value = RunState('TERMINATED', 'FAILED', '') with self.assertRaises(AirflowException): op.execute(None) expected = databricks_operator._deep_string_coerce({ 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': TASK_ID, }) db_mock_class.assert_called_once_with( DEFAULT_CONN_ID, retry_limit=op.databricks_retry_limit, retry_delay=op.databricks_retry_delay) db_mock.submit_run.assert_called_once_with(expected) db_mock.get_run_page_url.assert_called_once_with(RUN_ID) db_mock.get_run_state.assert_called_once_with(RUN_ID) self.assertEqual(RUN_ID, op.run_id)
def test_exec_success(self, db_mock_class): """ Test the execute function in case where the run is successful. """ run = { 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS } op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run) db_mock = db_mock_class.return_value db_mock.run_now.return_value = 1 db_mock.get_run_state.return_value = RunState('TERMINATED', 'SUCCESS', '') op.execute(None) expected = databricks_operator._deep_string_coerce({ 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS, 'job_id': JOB_ID, }) db_mock_class.assert_called_once_with( DEFAULT_CONN_ID, retry_limit=op.databricks_retry_limit, retry_delay=op.databricks_retry_delay) db_mock.run_now.assert_called_once_with(expected) db_mock.get_run_page_url.assert_called_once_with(RUN_ID) db_mock.get_run_state.assert_called_once_with(RUN_ID) assert RUN_ID == op.run_id
def test_exec_success(self, db_mock_class): """ Test the execute function in case where the run is successful. """ run = { 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, } op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=run) db_mock = db_mock_class.return_value db_mock.submit_run.return_value = 1 db_mock.get_run_state.return_value = RunState('TERMINATED', 'SUCCESS', '') op.execute(None) expected = databricks_operator._deep_string_coerce({ 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': TASK_ID }) db_mock_class.assert_called_once_with( DEFAULT_CONN_ID, retry_limit=op.databricks_retry_limit, retry_delay=op.databricks_retry_delay) db_mock.submit_run.assert_called_once_with(expected) db_mock.get_run_page_url.assert_called_once_with(RUN_ID) db_mock.get_run_state.assert_called_once_with(RUN_ID) assert RUN_ID == op.run_id
def test_init_with_named_parameters(self): """ Test the initializer with the named parameters. """ op = DatabricksRunNowOperator(job_id=JOB_ID, task_id=TASK_ID) expected = databricks_operator._deep_string_coerce({'job_id': 42}) self.assertDictEqual(expected, op.json)
def test_init_with_specified_run_name(self): """ Test the initializer with a specified run_name. """ json = {'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': RUN_NAME} op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=json) expected = databricks_operator._deep_string_coerce( {'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': RUN_NAME} ) self.assertDictEqual(expected, op.json)
def test_init_with_spark_python_task_named_parameters(self): """ Test the initializer with the named parameters. """ op = DatabricksSubmitRunOperator( task_id=TASK_ID, new_cluster=NEW_CLUSTER, spark_python_task=SPARK_PYTHON_TASK ) expected = databricks_operator._deep_string_coerce( {'new_cluster': NEW_CLUSTER, 'spark_python_task': SPARK_PYTHON_TASK, 'run_name': TASK_ID} ) self.assertDictEqual(expected, op.json)
def test_init_with_json(self): """ Test the initializer with json data. """ json = {'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK} op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=json) expected = databricks_operator._deep_string_coerce({ 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': TASK_ID }) assert expected == op.json
def test_init_with_named_parameters(self): """ Test the initializer with the named parameters. """ op = DatabricksSubmitRunOperator(task_id=TASK_ID, new_cluster=NEW_CLUSTER, notebook_task=NOTEBOOK_TASK) expected = databricks_operator._deep_string_coerce({ 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, 'run_name': TASK_ID }) self.assertDictEqual(expected, op.json)
def test_init_with_templating(self): json = { 'new_cluster': NEW_CLUSTER, 'notebook_task': TEMPLATED_NOTEBOOK_TASK, } dag = DAG('test', start_date=datetime.now()) op = DatabricksSubmitRunOperator(dag=dag, task_id=TASK_ID, json=json) op.render_template_fields(context={'ds': DATE}) expected = databricks_operator._deep_string_coerce({ 'new_cluster': NEW_CLUSTER, 'notebook_task': RENDERED_TEMPLATED_NOTEBOOK_TASK, 'run_name': TASK_ID, }) self.assertDictEqual(expected, op.json)
def test_init_with_templating(self): json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': TEMPLATED_JAR_PARAMS} dag = DAG('test', start_date=datetime.now()) op = DatabricksRunNowOperator(dag=dag, task_id=TASK_ID, job_id=JOB_ID, json=json) op.render_template_fields(context={'ds': DATE}) expected = databricks_operator._deep_string_coerce( { 'notebook_params': NOTEBOOK_PARAMS, 'jar_params': RENDERED_TEMPLATED_JAR_PARAMS, 'job_id': JOB_ID, } ) self.assertDictEqual(expected, op.json)
def test_deep_string_coerce(self): test_json = { 'test_int': 1, 'test_float': 1.0, 'test_dict': {'key': 'value'}, 'test_list': [1, 1.0, 'a', 'b'], 'test_tuple': (1, 1.0, 'a', 'b'), } expected = { 'test_int': '1', 'test_float': '1.0', 'test_dict': {'key': 'value'}, 'test_list': ['1', '1.0', 'a', 'b'], 'test_tuple': ['1', '1.0', 'a', 'b'], } self.assertDictEqual(databricks_operator._deep_string_coerce(test_json), expected)
def test_init_with_merging(self): """ Test the initializer when json and other named parameters are both provided. The named parameters should override top level keys in the json dict. """ override_new_cluster = {'workers': 999} json = { 'new_cluster': NEW_CLUSTER, 'notebook_task': NOTEBOOK_TASK, } op = DatabricksSubmitRunOperator(task_id=TASK_ID, json=json, new_cluster=override_new_cluster) expected = databricks_operator._deep_string_coerce({ 'new_cluster': override_new_cluster, 'notebook_task': NOTEBOOK_TASK, 'run_name': TASK_ID, }) self.assertDictEqual(expected, op.json)