def test_init_with_bad_type(self): json = {'test': datetime.now()} # Looks a bit weird since we have to escape regex reserved symbols. exception_message = r'Type \<(type|class) \'datetime.datetime\'\> used ' + \ r'for parameter json\[test\] is not a number or a string' with self.assertRaisesRegex(AirflowException, exception_message): DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=json)
def test_exec_failure(self, db_mock_class): """ Test the execute function in case where the run failed. """ run = { 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS } op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run) db_mock = db_mock_class.return_value db_mock.run_now.return_value = 1 db_mock.get_run_state.return_value = RunState('TERMINATED', 'FAILED', '') with self.assertRaises(AirflowException): op.execute(None) expected = databricks_operator._deep_string_coerce({ 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS, 'job_id': JOB_ID }) db_mock_class.assert_called_once_with( DEFAULT_CONN_ID, retry_limit=op.databricks_retry_limit, retry_delay=op.databricks_retry_delay) db_mock.run_now.assert_called_once_with(expected) db_mock.get_run_page_url.assert_called_once_with(RUN_ID) db_mock.get_run_state.assert_called_once_with(RUN_ID) self.assertEqual(RUN_ID, op.run_id)
def test_init_with_json(self): """ Test the initializer with json data. """ json = { 'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS, 'python_params': PYTHON_PARAMS, 'spark_submit_params': SPARK_SUBMIT_PARAMS, 'job_id': JOB_ID } op = DatabricksRunNowOperator(task_id=TASK_ID, json=json) expected = databricks_operator._deep_string_coerce({ 'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS, 'python_params': PYTHON_PARAMS, 'spark_submit_params': SPARK_SUBMIT_PARAMS, 'job_id': JOB_ID }) self.assertDictEqual(expected, op.json)
def test_init_with_merging(self): """ Test the initializer when json and other named parameters are both provided. The named parameters should override top level keys in the json dict. """ override_notebook_params = {'workers': 999} json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS} op = DatabricksRunNowOperator(task_id=TASK_ID, json=json, job_id=JOB_ID, notebook_params=override_notebook_params, python_params=PYTHON_PARAMS, spark_submit_params=SPARK_SUBMIT_PARAMS) expected = databricks_operator._deep_string_coerce({ 'notebook_params': override_notebook_params, 'jar_params': JAR_PARAMS, 'python_params': PYTHON_PARAMS, 'spark_submit_params': SPARK_SUBMIT_PARAMS, 'job_id': JOB_ID }) self.assertDictEqual(expected, op.json)
def test_exec_success(self, db_mock_class): """ Test the execute function in case where the run is successful. """ run = { 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS } op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run) db_mock = db_mock_class.return_value db_mock.run_now.return_value = 1 db_mock.get_run_state.return_value = RunState('TERMINATED', 'SUCCESS', '') op.execute(None) expected = databricks_operator._deep_string_coerce({ 'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS, 'job_id': JOB_ID, }) db_mock_class.assert_called_once_with( DEFAULT_CONN_ID, retry_limit=op.databricks_retry_limit, retry_delay=op.databricks_retry_delay) db_mock.run_now.assert_called_once_with(expected) db_mock.get_run_page_url.assert_called_once_with(RUN_ID) db_mock.get_run_state.assert_called_once_with(RUN_ID) assert RUN_ID == op.run_id
def test_init_with_named_parameters(self): """ Test the initializer with the named parameters. """ op = DatabricksRunNowOperator(job_id=JOB_ID, task_id=TASK_ID) expected = databricks_operator._deep_string_coerce({'job_id': 42}) self.assertDictEqual(expected, op.json)
def test_on_kill(self, db_mock_class): run = {'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS} op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run) db_mock = db_mock_class.return_value op.run_id = RUN_ID op.on_kill() db_mock.cancel_run.assert_called_once_with(RUN_ID)
def test_init_with_templating(self): json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': TEMPLATED_JAR_PARAMS} dag = DAG('test', start_date=datetime.now()) op = DatabricksRunNowOperator(dag=dag, task_id=TASK_ID, job_id=JOB_ID, json=json) op.render_template_fields(context={'ds': DATE}) expected = databricks_operator._deep_string_coerce( { 'notebook_params': NOTEBOOK_PARAMS, 'jar_params': RENDERED_TEMPLATED_JAR_PARAMS, 'job_id': JOB_ID, } ) self.assertDictEqual(expected, op.json)
with DAG( 'orchestration_good_practices', start_date=datetime(2021, 1, 1), schedule_interval='@daily', catchup=False, default_args={ 'owner': 'airflow', 'email_on_failure': False, 'retries': 1, 'retry_delay': timedelta(minutes=1) }, ) as dag: opr_refresh_mat_view = PostgresOperator( task_id='refresh_mat_view', postgres_conn_id='postgres_default', sql='REFRESH MATERIALIZED VIEW example_view;', ) opr_submit_run = DatabricksSubmitRunOperator( task_id='submit_run', databricks_conn_id='databricks', new_cluster=new_cluster, notebook_task=notebook_task) opr_run_now = DatabricksRunNowOperator(task_id='run_now', databricks_conn_id='databricks', job_id=5, notebook_params=notebook_params) opr_refresh_mat_view >> opr_submit_run >> opr_run_now
"depends_on_past": False, "email_on_failure": False, "email_on_retry": False, "retries": 1, "retry_delay": timedelta(minutes=2), } with DAG( "databricks_dag", start_date=datetime(2021, 1, 1), schedule_interval="@daily", catchup=False, default_args=default_args, ) as dag: opr_submit_run = DatabricksSubmitRunOperator( task_id="submit_run", databricks_conn_id="databricks", new_cluster=new_cluster, notebook_task=notebook_task, ) opr_run_now = DatabricksRunNowOperator( task_id="run_now", databricks_conn_id="databricks", job_id=5, notebook_params=notebook_params, ) opr_submit_run >> opr_run_now
from airflow import DAG from airflow.operators.bash import BashOperator from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator, DatabricksRunNowOperator from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'depends_on_past': False, 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=2) } with DAG('databricks_dieg_dag', start_date=datetime(2021, 1, 1), schedule_interval='@daily', catchup=False, default_args=default_args) as dag: helloworld1 = DatabricksRunNowOperator(task_id='run_now', databricks_conn_id='Databricksdieg', job_id=5) helloworld2 = BashOperator( task_id='running_bash_bitch', bash_command='echo donneeeeeeeeeeeeeeeee', ) helloworld1 >> helloworld2