Beispiel #1
0
    def test_exec_success(self, db_mock_class):
        """
        Test the execute function in case where the run is successful.
        """
        run = {
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS
        }
        op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run)
        db_mock = db_mock_class.return_value
        db_mock.run_now.return_value = 1
        db_mock.get_run_state.return_value = RunState('TERMINATED', 'SUCCESS',
                                                      '')

        op.execute(None)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS,
            'job_id': JOB_ID,
        })

        db_mock_class.assert_called_once_with(
            DEFAULT_CONN_ID,
            retry_limit=op.databricks_retry_limit,
            retry_delay=op.databricks_retry_delay)
        db_mock.run_now.assert_called_once_with(expected)
        db_mock.get_run_page_url.assert_called_once_with(RUN_ID)
        db_mock.get_run_state.assert_called_once_with(RUN_ID)
        assert RUN_ID == op.run_id
Beispiel #2
0
    def test_exec_failure(self, db_mock_class):
        """
        Test the execute function in case where the run failed.
        """
        run = {
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS
        }
        op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run)
        db_mock = db_mock_class.return_value
        db_mock.run_now.return_value = 1
        db_mock.get_run_state.return_value = RunState('TERMINATED', 'FAILED',
                                                      '')

        with self.assertRaises(AirflowException):
            op.execute(None)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params': NOTEBOOK_PARAMS,
            'notebook_task': NOTEBOOK_TASK,
            'jar_params': JAR_PARAMS,
            'job_id': JOB_ID
        })
        db_mock_class.assert_called_once_with(
            DEFAULT_CONN_ID,
            retry_limit=op.databricks_retry_limit,
            retry_delay=op.databricks_retry_delay)
        db_mock.run_now.assert_called_once_with(expected)
        db_mock.get_run_page_url.assert_called_once_with(RUN_ID)
        db_mock.get_run_state.assert_called_once_with(RUN_ID)
        self.assertEqual(RUN_ID, op.run_id)
Beispiel #3
0
    def test_on_kill(self, db_mock_class):
        run = {'notebook_params': NOTEBOOK_PARAMS, 'notebook_task': NOTEBOOK_TASK, 'jar_params': JAR_PARAMS}
        op = DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=run)
        db_mock = db_mock_class.return_value
        op.run_id = RUN_ID

        op.on_kill()
        db_mock.cancel_run.assert_called_once_with(RUN_ID)
Beispiel #4
0
    def test_init_with_templating(self):
        json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': TEMPLATED_JAR_PARAMS}

        dag = DAG('test', start_date=datetime.now())
        op = DatabricksRunNowOperator(dag=dag, task_id=TASK_ID, job_id=JOB_ID, json=json)
        op.render_template_fields(context={'ds': DATE})
        expected = databricks_operator._deep_string_coerce(
            {
                'notebook_params': NOTEBOOK_PARAMS,
                'jar_params': RENDERED_TEMPLATED_JAR_PARAMS,
                'job_id': JOB_ID,
            }
        )
        self.assertDictEqual(expected, op.json)
Beispiel #5
0
 def test_init_with_bad_type(self):
     json = {'test': datetime.now()}
     # Looks a bit weird since we have to escape regex reserved symbols.
     exception_message = r'Type \<(type|class) \'datetime.datetime\'\> used ' + \
                         r'for parameter json\[test\] is not a number or a string'
     with self.assertRaisesRegex(AirflowException, exception_message):
         DatabricksRunNowOperator(task_id=TASK_ID, job_id=JOB_ID, json=json)
Beispiel #6
0
    def test_init_with_merging(self):
        """
        Test the initializer when json and other named parameters are both
        provided. The named parameters should override top level keys in the
        json dict.
        """
        override_notebook_params = {'workers': 999}
        json = {'notebook_params': NOTEBOOK_PARAMS, 'jar_params': JAR_PARAMS}

        op = DatabricksRunNowOperator(task_id=TASK_ID,
                                      json=json,
                                      job_id=JOB_ID,
                                      notebook_params=override_notebook_params,
                                      python_params=PYTHON_PARAMS,
                                      spark_submit_params=SPARK_SUBMIT_PARAMS)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params':
            override_notebook_params,
            'jar_params':
            JAR_PARAMS,
            'python_params':
            PYTHON_PARAMS,
            'spark_submit_params':
            SPARK_SUBMIT_PARAMS,
            'job_id':
            JOB_ID
        })

        self.assertDictEqual(expected, op.json)
Beispiel #7
0
    def test_init_with_json(self):
        """
        Test the initializer with json data.
        """
        json = {
            'notebook_params': NOTEBOOK_PARAMS,
            'jar_params': JAR_PARAMS,
            'python_params': PYTHON_PARAMS,
            'spark_submit_params': SPARK_SUBMIT_PARAMS,
            'job_id': JOB_ID
        }
        op = DatabricksRunNowOperator(task_id=TASK_ID, json=json)

        expected = databricks_operator._deep_string_coerce({
            'notebook_params':
            NOTEBOOK_PARAMS,
            'jar_params':
            JAR_PARAMS,
            'python_params':
            PYTHON_PARAMS,
            'spark_submit_params':
            SPARK_SUBMIT_PARAMS,
            'job_id':
            JOB_ID
        })

        self.assertDictEqual(expected, op.json)
Beispiel #8
0
    def test_init_with_named_parameters(self):
        """
        Test the initializer with the named parameters.
        """
        op = DatabricksRunNowOperator(job_id=JOB_ID, task_id=TASK_ID)
        expected = databricks_operator._deep_string_coerce({'job_id': 42})

        self.assertDictEqual(expected, op.json)
with DAG(
        'orchestration_good_practices',
        start_date=datetime(2021, 1, 1),
        schedule_interval='@daily',
        catchup=False,
        default_args={
            'owner': 'airflow',
            'email_on_failure': False,
            'retries': 1,
            'retry_delay': timedelta(minutes=1)
        },
) as dag:

    opr_refresh_mat_view = PostgresOperator(
        task_id='refresh_mat_view',
        postgres_conn_id='postgres_default',
        sql='REFRESH MATERIALIZED VIEW example_view;',
    )

    opr_submit_run = DatabricksSubmitRunOperator(
        task_id='submit_run',
        databricks_conn_id='databricks',
        new_cluster=new_cluster,
        notebook_task=notebook_task)
    opr_run_now = DatabricksRunNowOperator(task_id='run_now',
                                           databricks_conn_id='databricks',
                                           job_id=5,
                                           notebook_params=notebook_params)

    opr_refresh_mat_view >> opr_submit_run >> opr_run_now
    "depends_on_past": False,
    "email_on_failure": False,
    "email_on_retry": False,
    "retries": 1,
    "retry_delay": timedelta(minutes=2),
}

with DAG(
        "databricks_dag",
        start_date=datetime(2021, 1, 1),
        schedule_interval="@daily",
        catchup=False,
        default_args=default_args,
) as dag:

    opr_submit_run = DatabricksSubmitRunOperator(
        task_id="submit_run",
        databricks_conn_id="databricks",
        new_cluster=new_cluster,
        notebook_task=notebook_task,
    )

    opr_run_now = DatabricksRunNowOperator(
        task_id="run_now",
        databricks_conn_id="databricks",
        job_id=5,
        notebook_params=notebook_params,
    )

    opr_submit_run >> opr_run_now
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator, DatabricksRunNowOperator
from datetime import datetime, timedelta

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=2)
}

with DAG('databricks_dieg_dag',
         start_date=datetime(2021, 1, 1),
         schedule_interval='@daily',
         catchup=False,
         default_args=default_args) as dag:
    helloworld1 = DatabricksRunNowOperator(task_id='run_now',
                                           databricks_conn_id='Databricksdieg',
                                           job_id=5)

    helloworld2 = BashOperator(
        task_id='running_bash_bitch',
        bash_command='echo donneeeeeeeeeeeeeeeee',
    )

    helloworld1 >> helloworld2