Beispiel #1
0
    def test_python_callable_keyword_arguments_are_templatized(self):
        """Test PythonSensor op_kwargs are templatized"""
        recorded_calls = []

        task = PythonSensor(
            task_id='python_sensor',
            timeout=0.01,
            poke_interval=0.3,
            # a Mock instance cannot be used as a callable function or test fails with a
            # TypeError: Object of type Mock is not JSON serializable
            python_callable=build_recording_function(recorded_calls),
            op_kwargs={
                'an_int': 4,
                'a_date': date(2019, 1, 1),
                'a_templated_string': "dag {{dag.dag_id}} ran on {{ds}}."
            },
            dag=self.dag)

        self.dag.create_dagrun(run_id='manual__' + DEFAULT_DATE.isoformat(),
                               execution_date=DEFAULT_DATE,
                               start_date=DEFAULT_DATE,
                               state=State.RUNNING)
        with self.assertRaises(AirflowSensorTimeout):
            task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        # 2 calls: first: at start, second: before timeout
        self.assertEqual(2, len(recorded_calls))
        self._assert_calls_equal(
            recorded_calls[0],
            Call(an_int=4,
                 a_date=date(2019, 1, 1),
                 a_templated_string="dag {} ran on {}.".format(
                     self.dag.dag_id,
                     DEFAULT_DATE.date().isoformat())))
Beispiel #2
0
 def test_python_sensor_true(self):
     t = PythonSensor(task_id='python_sensor_check_true',
                      python_callable=lambda: True,
                      dag=self.dag)
     t.run(start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           ignore_ti_state=True)
Beispiel #3
0
 def test_python_sensor_raise(self):
     t = PythonSensor(
         task_id='python_sensor_check_raise',
         python_callable=lambda: 1 / 0,
         dag=self.dag)
     with self.assertRaises(ZeroDivisionError):
         t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
 def test_python_sensor_raise(self):
     t = PythonSensor(
         task_id='python_sensor_check_raise',
         python_callable=lambda: 1 / 0,
         dag=self.dag)
     with self.assertRaises(ZeroDivisionError):
         t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Beispiel #5
0
 def test_python_sensor_false(self):
     t = PythonSensor(
         task_id='python_sensor_check_false',
         timeout=1,
         python_callable=lambda: False,
         dag=self.dag)
     with self.assertRaises(AirflowSensorTimeout):
         t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Beispiel #6
0
    def test_python_callable_arguments_are_templatized(self):
        """Test PythonSensor op_args are templatized"""
        recorded_calls = []

        # Create a named tuple and ensure it is still preserved
        # after the rendering is done
        Named = namedtuple('Named', ['var1', 'var2'])
        named_tuple = Named('{{ ds }}', 'unchanged')

        task = PythonSensor(
            task_id='python_sensor',
            timeout=0.01,
            poke_interval=0.3,
            # a Mock instance cannot be used as a callable function or test fails with a
            # TypeError: Object of type Mock is not JSON serializable
            python_callable=build_recording_function(recorded_calls),
            op_args=[
                4,
                date(2019, 1, 1), "dag {{dag.dag_id}} ran on {{ds}}.",
                named_tuple
            ],
            dag=self.dag)

        self.dag.create_dagrun(run_id='manual__' + DEFAULT_DATE.isoformat(),
                               execution_date=DEFAULT_DATE,
                               start_date=DEFAULT_DATE,
                               state=State.RUNNING)
        with self.assertRaises(AirflowSensorTimeout):
            task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        ds_templated = DEFAULT_DATE.date().isoformat()
        # 2 calls: first: at start, second: before timeout
        self.assertEqual(2, len(recorded_calls))
        self._assert_calls_equal(
            recorded_calls[0],
            Call(4, date(2019, 1, 1),
                 "dag {} ran on {}.".format(self.dag.dag_id, ds_templated),
                 Named(ds_templated, 'unchanged')))
        data=json.dumps({
            "ExperimentName": "Face_detection_Haar_cascade_pipeline_REST",
            "RunSource": "SDK",
            "ParameterAssignments": {
                "sample_num": "1"
            }
        }),
        log_response=True,
        xcom_push=True)

    wait_face_detection_pipeline = PythonSensor(
        task_id='sense_face_detection_pipeline_end',
        poke_interval=10,
        timeout=60 * 10,  # 10 minutes
        python_callable=wait_till_pipeline_end,
        op_kwargs={
            'experiment_name':
            'Face_detection_Haar_cascade_pipeline_REST',
            'task_xcom':
            "{{ ti.xcom_pull(task_ids='face_detection_haar_cascade', key='return_value') }}"
        })

    blur_face = SimpleHttpOperator(
        task_id='face_blurring',
        endpoint=face_blurring_pipeline_endpoint,
        http_conn_id='azure_pipelines_http_endpoint',
        method='POST',
        headers={
            'Authorization': 'Bearer ' + pipeline_token,
            'Content-Type': 'application/json'
        },
Beispiel #8
0
    def test_reschedule_handling(self, mock_pool_full):
        """
        Test that task reschedules are handled properly
        """
        # Mock the pool with a pool with slots open since the pool doesn't actually exist
        mock_pool_full.return_value = False

        # Return values of the python sensor callable, modified during tests
        done = False
        fail = False

        def callable():
            if fail:
                raise AirflowException()
            return done

        dag = models.DAG(dag_id='test_reschedule_handling')
        task = PythonSensor(task_id='test_reschedule_handling_sensor',
                            poke_interval=0,
                            mode='reschedule',
                            python_callable=callable,
                            retries=1,
                            retry_delay=datetime.timedelta(seconds=0),
                            dag=dag,
                            owner='airflow',
                            start_date=timezone.datetime(2016, 2, 1, 0, 0, 0))

        ti = TI(task=task, execution_date=timezone.utcnow())
        self.assertEqual(ti._try_number, 0)
        self.assertEqual(ti.try_number, 1)

        def run_ti_and_assert(run_date, expected_start_date, expected_end_date,
                              expected_duration, expected_state,
                              expected_try_number,
                              expected_task_reschedule_count):
            with freeze_time(run_date):
                try:
                    ti.run()
                except AirflowException:
                    if not fail:
                        raise
            ti.refresh_from_db()
            self.assertEqual(ti.state, expected_state)
            self.assertEqual(ti._try_number, expected_try_number)
            self.assertEqual(ti.try_number, expected_try_number + 1)
            self.assertEqual(ti.start_date, expected_start_date)
            self.assertEqual(ti.end_date, expected_end_date)
            self.assertEqual(ti.duration, expected_duration)
            trs = TaskReschedule.find_for_task_instance(ti)
            self.assertEqual(len(trs), expected_task_reschedule_count)

        date1 = timezone.utcnow()
        date2 = date1 + datetime.timedelta(minutes=1)
        date3 = date2 + datetime.timedelta(minutes=1)
        date4 = date3 + datetime.timedelta(minutes=1)

        # Run with multiple reschedules.
        # During reschedule the try number remains the same, but each reschedule is recorded.
        # The start date is expected to remain the initial date, hence the duration increases.
        # When finished the try number is incremented and there is no reschedule expected
        # for this try.

        done, fail = False, False
        run_ti_and_assert(date1, date1, date1, 0, State.UP_FOR_RESCHEDULE, 0,
                          1)

        done, fail = False, False
        run_ti_and_assert(date2, date1, date2, 60, State.UP_FOR_RESCHEDULE, 0,
                          2)

        done, fail = False, False
        run_ti_and_assert(date3, date1, date3, 120, State.UP_FOR_RESCHEDULE, 0,
                          3)

        done, fail = True, False
        run_ti_and_assert(date4, date1, date4, 180, State.SUCCESS, 1, 0)

        # Clear the task instance.
        dag.clear()
        ti.refresh_from_db()
        self.assertEqual(ti.state, State.NONE)
        self.assertEqual(ti._try_number, 1)

        # Run again after clearing with reschedules and a retry.
        # The retry increments the try number, and for that try no reschedule is expected.
        # After the retry the start date is reset, hence the duration is also reset.

        done, fail = False, False
        run_ti_and_assert(date1, date1, date1, 0, State.UP_FOR_RESCHEDULE, 1,
                          1)

        done, fail = False, True
        run_ti_and_assert(date2, date1, date2, 60, State.UP_FOR_RETRY, 2, 0)

        done, fail = False, False
        run_ti_and_assert(date3, date3, date3, 0, State.UP_FOR_RESCHEDULE, 2,
                          1)

        done, fail = True, False
        run_ti_and_assert(date4, date3, date4, 60, State.SUCCESS, 3, 0)
Beispiel #9
0
    schedule_interval="0 16 * * *",
    description=
    "A batch workflow for ingesting supermarket promotions data, demonstrating the PythonSensor.",
    default_args={"depends_on_past": True},
)

create_metrics = DummyOperator(task_id="create_metrics", dag=dag)


def _wait_for_supermarket(supermarket_id_):
    supermarket_path = Path("/opt/airflow/data/" + supermarket_id_)
    data_files = supermarket_path.glob("data-*.csv")
    success_file = supermarket_path / "_SUCCESS"
    return data_files and success_file.exists()


for supermarket_id in range(1, 5):
    wait = PythonSensor(
        task_id=f"wait_for_supermarket_{supermarket_id}",
        python_callable=_wait_for_supermarket,
        op_kwargs={"supermarket_id_": f"supermarket{supermarket_id}"},
        timeout=600,
        mode="reschedule",
        dag=dag,
    )
    copy = DummyOperator(task_id=f"copy_to_raw_supermarket_{supermarket_id}",
                         dag=dag)
    process = DummyOperator(task_id=f"process_supermarket_{supermarket_id}",
                            dag=dag)
    wait >> copy >> process >> create_metrics
    default_args={"depends_on_past": True},
)


def _wait_for_supermarket(supermarket_id_):
    supermarket_path = Path("/data/" + supermarket_id_)
    data_files = supermarket_path.glob("data-*.csv")
    success_file = supermarket_path / "_SUCCESS"
    return data_files and success_file.exists()


for supermarket_id in [1, 2, 3, 4]:
    wait = PythonSensor(
        task_id=f"wait_for_supermarket_{supermarket_id}",
        python_callable=_wait_for_supermarket,
        op_kwargs={"supermarket_id": f"supermarket{supermarket_id}"},
        provide_context=True,
        dag=dag,
    )
    copy = DummyOperator(task_id=f"copy_to_raw_supermarket_{supermarket_id}", dag=dag)
    process = DummyOperator(task_id=f"process_supermarket_{supermarket_id}", dag=dag)
    generate_metrics = DummyOperator(
        task_id=f"generate_metrics_supermarket_{supermarket_id}", dag=dag
    )
    compute_differences = DummyOperator(
        task_id=f"compute_differences_supermarket_{supermarket_id}", dag=dag
    )
    update_dashboard = DummyOperator(
        task_id=f"update_dashboard_supermarket_{supermarket_id}", dag=dag
    )
    notify_new_data = DummyOperator(
from pathlib import Path

import airflow.utils.dates
from airflow import DAG
from airflow.contrib.sensors.python_sensor import PythonSensor

dag = DAG(
    dag_id="06_listing_6_2",
    start_date=airflow.utils.dates.days_ago(3),
    schedule_interval="0 16 * * *",
    description="A batch workflow for ingesting supermarket promotions data.",
    default_args={"depends_on_past": True},
)


def _wait_for_supermarket(supermarket_id_):
    supermarket_path = Path("/opt/airflow/data/" + supermarket_id_)
    data_files = supermarket_path.glob("data-*.csv")
    success_file = supermarket_path / "_SUCCESS"
    return data_files and success_file.exists()


wait_for_supermarket_1 = PythonSensor(
    task_id="wait_for_supermarket_1",
    python_callable=_wait_for_supermarket,
    op_kwargs={"supermarket_id": "supermarket1"},
    dag=dag,
)
Beispiel #12
0
    start_date=airflow.utils.dates.days_ago(3),
    schedule_interval=None,
)


def _wait_for_supermarket(supermarket_id_):
    supermarket_path = Path("/data/" + supermarket_id_)
    data_files = supermarket_path.glob("data-*.csv")
    success_file = supermarket_path / "_SUCCESS"
    return data_files and success_file.exists()


for supermarket_id in range(1, 5):
    wait = PythonSensor(
        task_id=f"wait_for_supermarket_{supermarket_id}",
        python_callable=_wait_for_supermarket,
        op_kwargs={"supermarket_id_": f"supermarket{supermarket_id}"},
        dag=dag1,
    )
    copy = DummyOperator(task_id=f"copy_to_raw_supermarket_{supermarket_id}",
                         dag=dag1)
    process = DummyOperator(task_id=f"process_supermarket_{supermarket_id}",
                            dag=dag1)
    trigger_create_metrics_dag = TriggerDagRunOperator(
        task_id=f"trigger_create_metrics_dag_supermarket_{supermarket_id}",
        trigger_dag_id="create_metrics",
        dag=dag1,
    )
    wait >> copy >> process >> trigger_create_metrics_dag

compute_differences = DummyOperator(task_id=f"compute_differences", dag=dag2)
update_dashboard = DummyOperator(task_id=f"update_dashboard", dag=dag2)
Beispiel #13
0
    )

    unzip = BashOperator(
        task_id='unzip',
        bash_command='gunzip {{ ti.xcom_pull(task_ids="download") }}',
    )

    extract_release = PythonOperator(
        task_id='extract_release',
        python_callable=extract_release_events,
        op_kwargs={'gz_file': '{{ ti.xcom_pull(task_ids="download") }}'},
    )

    check_release_present = PythonSensor(
        task_id="check_release_present",
        python_callable=check_if_file_is_empty,
        provide_context=True,
        op_kwargs={'task_id_to_get_full_path': 'extract_release'},
        poke_interval=30,
    )

    send_releases = PythonOperator(
        task_id="send_release_to_slack",
        python_callable=send_release_to_slack,
        op_kwargs={
            'release_file': '{{ ti.xcom_pull(task_ids="extract_release") }}'
        },
    )

    sensor >> get_gh_archive >> unzip >> extract_release >> check_release_present >> send_releases
 def test_python_sensor_true(self):
     t = PythonSensor(
         task_id='python_sensor_check_true',
         python_callable=lambda: True,
         dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Beispiel #15
0
default_args = {
    "owner": "godatadriven",
    "start_date": airflow.utils.dates.days_ago(14)
}

dag = DAG(
    dag_id="b_pythonsensor",
    default_args=default_args,
    schedule_interval="0 0 * * *",
    description="Example PythonSensor",
)


def _time_for_coffee():
    """I drink coffee between 6 and 12"""
    return 6 <= datetime.now().hour < 12


time_for_coffee = PythonSensor(
    task_id="time_for_coffee",
    python_callable=_time_for_coffee,
    mode="reschedule",
    dag=dag,
)

make_coffee = BashOperator(task_id="make_coffee",
                           bash_command="echo 'Time for coffee!'",
                           dag=dag)

time_for_coffee >> make_coffee