Esempio n. 1
0
 def test_external_task_sensor(self):
     self.test_time_sensor()
     op = ExternalTaskSensor(
         task_id='test_external_task_sensor_check',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True)
Esempio n. 2
0
 def test_external_task_sensor_delta(self):
     self.test_time_sensor()
     op = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_delta=timedelta(0),
         allowed_states=['success'],
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True)
Esempio n. 3
0
    def test_external_task_sensor_waits_for_dag_check_existence(self):
        op = ExternalTaskSensor(
            task_id='test_external_task_sensor_check',
            external_dag_id="non-existing-dag",
            external_task_id=None,
            check_existence=True,
            dag=self.dag,
        )

        with self.assertRaises(AirflowException):
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   ignore_ti_state=True)
    def test_external_task_sensor_waits_for_task_check_existence(self):
        op = ExternalTaskSensor(
            task_id='test_external_task_sensor_check',
            external_dag_id="example_bash_operator",
            external_task_id="non-existing-task",
            check_existence=True,
            dag=self.dag,
        )

        with pytest.raises(AirflowException):
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   ignore_ti_state=True)
Esempio n. 5
0
 def test_external_task_sensor_fn(self):
     self.test_time_sensor()
     # check that the execution_fn works
     op1 = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta_1',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_date_fn=lambda dt: dt + timedelta(0),
         allowed_states=['success'],
         dag=self.dag,
     )
     op1.run(start_date=DEFAULT_DATE,
             end_date=DEFAULT_DATE,
             ignore_ti_state=True)
     # double check that the execution is being called by failing the test
     op2 = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta_2',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_date_fn=lambda dt: dt + timedelta(days=1),
         allowed_states=['success'],
         timeout=1,
         poke_interval=1,
         dag=self.dag,
     )
     with self.assertRaises(exceptions.AirflowSensorTimeout):
         op2.run(start_date=DEFAULT_DATE,
                 end_date=DEFAULT_DATE,
                 ignore_ti_state=True)
Esempio n. 6
0
def task_sensor():
    """
        TODO 샘플 예제 잘 작동 안함..

        DAG가 동일 파일에 두개 있으면, DAG두개 생성됨
        각각의 dag가 다른 dag를 참조 할수 있음
    """
    with makeDag("task_marker_test") as parent_dag:
        parent_task = ExternalTaskMarker(
            task_id="parent_task",
            external_dag_id="task_sensor_test",
            external_task_id="child_task1",
        )

    with makeDag("task_sensor_test") as child_dag:
        child_task1 = ExternalTaskSensor(
            task_id="child_task1",
            external_dag_id=parent_dag.dag_id,
            external_task_id=parent_task.task_id,
            timeout=600,
            allowed_states=['success'],
            failed_states=['failed', 'skipped'],
            mode="reschedule",
        )

        child_task2 = DummyOperator(task_id="child_task2")
        child_task1 >> child_task2
 def test_external_task_sensor_failed_states_as_success(self):
     self.test_time_sensor()
     op = ExternalTaskSensor(
         task_id='test_external_task_sensor_check',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         allowed_states=["failed"],
         failed_states=["success"],
         dag=self.dag,
     )
     with pytest.raises(AirflowException) as ctx:
         op.run(start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True)
     assert str(
         ctx.value
     ) == "The external task " "time_sensor_check in DAG " "unit_test_dag failed."
Esempio n. 8
0
    def test_catch_invalid_allowed_states(self):
        with self.assertRaises(ValueError):
            ExternalTaskSensor(
                task_id='test_external_task_sensor_check_1',
                external_dag_id=TEST_DAG_ID,
                external_task_id=TEST_TASK_ID,
                allowed_states=['invalid_state'],
                dag=self.dag,
            )

        with self.assertRaises(ValueError):
            ExternalTaskSensor(
                task_id='test_external_task_sensor_check_2',
                external_dag_id=TEST_DAG_ID,
                external_task_id=None,
                allowed_states=['invalid_state'],
                dag=self.dag,
            )
Esempio n. 9
0
 def test_external_task_sensor_wrong_failed_states(self):
     with self.assertRaises(ValueError):
         ExternalTaskSensor(
             task_id='test_external_task_sensor_check',
             external_dag_id=TEST_DAG_ID,
             external_task_id=TEST_TASK_ID,
             failed_states=["invalid_state"],
             dag=self.dag,
         )
Esempio n. 10
0
 def test_external_dag_sensor(self):
     other_dag = DAG('other_dag',
                     default_args=self.args,
                     end_date=DEFAULT_DATE,
                     schedule_interval='@once')
     other_dag.create_dagrun(run_id='test',
                             start_date=DEFAULT_DATE,
                             execution_date=DEFAULT_DATE,
                             state=State.SUCCESS)
     op = ExternalTaskSensor(
         task_id='test_external_dag_sensor_check',
         external_dag_id='other_dag',
         external_task_id=None,
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True)
Esempio n. 11
0
    def test_external_task_sensor_fn_multiple_args(self):
        """Check this task sensor passes multiple args with full context. If no failure, means clean run."""
        self.test_time_sensor()

        def my_func(dt, context):
            assert context['execution_date'] == dt
            return dt + timedelta(0)

        op1 = ExternalTaskSensor(
            task_id='test_external_task_sensor_multiple_arg_fn',
            external_dag_id=TEST_DAG_ID,
            external_task_id=TEST_TASK_ID,
            execution_date_fn=my_func,
            allowed_states=['success'],
            dag=self.dag,
        )
        op1.run(start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True)
Esempio n. 12
0
 def test_catch_overlap_allowed_failed_state(self):
     with self.assertRaises(AirflowException):
         ExternalTaskSensor(
             task_id='test_external_task_sensor_check',
             external_dag_id=TEST_DAG_ID,
             external_task_id=TEST_TASK_ID,
             allowed_states=[State.SUCCESS],
             failed_states=[State.SUCCESS],
             dag=self.dag,
         )
    def test_templated_sensor(self):
        with self.dag:
            sensor = ExternalTaskSensor(task_id='templated_task',
                                        external_dag_id='dag_{{ ds }}',
                                        external_task_id='task_{{ ds }}')

        instance = TaskInstance(sensor, DEFAULT_DATE)
        instance.render_templates()

        assert sensor.external_dag_id == f"dag_{DEFAULT_DATE.date()}"
        assert sensor.external_task_id == f"task_{DEFAULT_DATE.date()}"
Esempio n. 14
0
    def test_external_task_sensor_fn_kwargs(self):
        """Check this task sensor passes multiple args with full context. If no failure, means clean run."""
        self.test_time_sensor()

        def my_func(dt, ds_nodash, tomorrow_ds_nodash):
            assert ds_nodash == dt.strftime("%Y%m%d")
            assert tomorrow_ds_nodash == (dt +
                                          timedelta(days=1)).strftime("%Y%m%d")
            return dt + timedelta(0)

        op1 = ExternalTaskSensor(
            task_id='test_external_task_sensor_fn_kwargs',
            external_dag_id=TEST_DAG_ID,
            external_task_id=TEST_TASK_ID,
            execution_date_fn=my_func,
            allowed_states=['success'],
            dag=self.dag,
        )
        op1.run(start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True)
Esempio n. 15
0
 def test_external_task_sensor_error_delta_and_fn(self):
     self.test_time_sensor()
     # Test that providing execution_delta and a function raises an error
     with self.assertRaises(ValueError):
         ExternalTaskSensor(
             task_id='test_external_task_sensor_check_delta',
             external_dag_id=TEST_DAG_ID,
             external_task_id=TEST_TASK_ID,
             execution_delta=timedelta(0),
             execution_date_fn=lambda dt: dt,
             allowed_states=['success'],
             dag=self.dag,
         )
Esempio n. 16
0
    def _get_external_task_sensor(self, from_task_id: str, to_task_id: str) -> ExternalTaskSensor:
        from_pipeline_name = self._task_graph.get_node(from_task_id).obj.pipeline_name
        from_task_name = self._task_graph.get_node(from_task_id).obj.name

        from_pipeline_schedule = self._task_graph.get_node(from_task_id).obj.pipeline.schedule
        to_pipeline_schedule = self._task_graph.get_node(to_task_id).obj.pipeline.schedule

        return ExternalTaskSensor(
            task_id=f"{from_pipeline_name}-{from_task_name}-sensor",
            external_dag_id=from_pipeline_name,
            external_task_id=from_task_name,
            execution_date_fn=self._get_execution_date_fn(from_pipeline_schedule, to_pipeline_schedule),
            mode=conf.EXTERNAL_SENSOR_MODE,
            poke_interval=conf.EXTERNAL_SENSOR_POKE_INTERVAL,
            timeout=conf.EXTERNAL_SENSOR_TIMEOUT
        )
Esempio n. 17
0
def dag_bag_cyclic():
    """
    Create a DagBag with DAGs having cyclic dependencies set up by ExternalTaskMarker and
    ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                  ^          |
                  |          |
    dag_1:        |          ---> task_a_1 >> task_b_1
                  |                               |
                  ---------------------------------

    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(task_id="task_b_0",
                                  external_dag_id="dag_1",
                                  external_task_id="task_a_1",
                                  recursion_depth=3,
                                  dag=dag_0)
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(task_id="task_a_1",
                                  external_dag_id=dag_0.dag_id,
                                  external_task_id=task_b_0.task_id,
                                  dag=dag_1)
    task_b_1 = ExternalTaskMarker(task_id="task_b_1",
                                  external_dag_id="dag_0",
                                  external_task_id="task_a_0",
                                  recursion_depth=2,
                                  dag=dag_1)
    task_a_1 >> task_b_1

    for dag in [dag_0, dag_1]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
Esempio n. 18
0
def dag_bag_head_tail():
    """
    Create a DagBag containing one DAG, with task "head" depending on task "tail" of the
    previous execution_date.

    20200501     20200502                 20200510
    +------+     +------+                 +------+
    | head |    -->head |    -->         -->head |
    |  |   |   / |  |   |   /           / |  |   |
    |  v   |  /  |  v   |  /           /  |  v   |
    | body | /   | body | /     ...   /   | body |
    |  |   |/    |  |   |/           /    |  |   |
    |  v   /     |  v   /           /     |  v   |
    | tail/|     | tail/|          /      | tail |
    +------+     +------+                 +------+
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)
    with DAG("head_tail", start_date=DEFAULT_DATE,
             schedule_interval="@daily") as dag:
        head = ExternalTaskSensor(
            task_id='head',
            external_dag_id=dag.dag_id,
            external_task_id="tail",
            execution_delta=timedelta(days=1),
            mode="reschedule",
        )
        body = DummyOperator(task_id="body")
        tail = ExternalTaskMarker(
            task_id="tail",
            external_dag_id=dag.dag_id,
            external_task_id=head.task_id,
            execution_date="{{ tomorrow_ds_nodash }}",
        )
        head >> body >> tail

    dag_bag.bag_dag(dag=dag, root_dag=dag)

    yield dag_bag
Esempio n. 19
0
def create_sub_dag(parent_dag, sub_dag_name, start_date, schedule_interval):
    with DAG(dag_id=f'{parent_dag}.{sub_dag_name}',
             start_date=start_date,
             schedule_interval=schedule_interval) as dag:
        # senses if external dag has started
        task_sensor = ExternalTaskSensor(task_id='task_sensor',
                                         external_dag_id=external_dag,
                                         external_task_id=None,
                                         poke_interval=15)
        # prints results
        print_results = PythonOperator(task_id='print_results',
                                       python_callable=print_res,
                                       op_args=[external_task, external_dag])
        # removes file
        remove_file = BashOperator(task_id='remove_file',
                                   bash_command=f'rm -f {path}')
        # creates a file with appropriate timestamp
        create_timestamp = BashOperator(
            task_id='create_timestamp',
            bash_command='touch ~/timestamp_{{ ts_nodash }}',
        )
        task_sensor >> print_results >> remove_file >> create_timestamp
    return dag
Esempio n. 20
0
HOST_DATA_DIR = os.environ["HOST_DATA_DIR"]
DATA_RAW_PATH = "/data/raw/{{ ds }}"
DATA_SPLIT_PATH = "/data/split/{{ ds }}"
DATA_TRANSFORMED_PATH = "/data/transformed/{{ ds }}"
MODEL_PATH = "/data/model/{{ ds }}"

with DAG(
    "train_pipeline",
    default_args=default_args,
    schedule_interval="@weekly",
    start_date=days_ago(30),
) as dag:
    data_sensor = ExternalTaskSensor(
        task_id="data-sensor",
        external_dag_id="download",
        external_task_id="download",
        check_existence=True,
        timeout=30,
    )

    split = DockerOperator(
        image="airflow-split",
        command=f"-l {DATA_RAW_PATH} -s {DATA_SPLIT_PATH}",
        network_mode="bridge",
        task_id="split",
        do_xcom_push=False,
        auto_remove=True,
        volumes=[f"{HOST_DATA_DIR}:/data"],
    )

    fit_transformer = DockerOperator(
Esempio n. 21
0
          schedule_interval='0 12 * * 1')

pw = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']['password']

tasks_to_wait_for = [
    'fact_hospital_transaction_clarity', 'dim_hospital_account_clarity',
    'dim_guarantor_clarity', 'bridge_hospital_account_coverage_clarity',
    'dim_payor_plan_clarity', 'dim_patient_clarity'
]

tasks = []
for t in tasks_to_wait_for:
    task = ExternalTaskSensor(external_dag_id='run_master_etl',
                              external_task_id='exec_ebi_{}_logic'.format(t),
                              execution_delta=timedelta(days=-6,
                                                        hours=7,
                                                        minutes=20),
                              task_id='wait_for_{}'.format(t),
                              dag=dag)
    tasks.append(task)

path = 'C:\\Airflow\\send_bmt'
ebi_db_server_prod = Variable.get('ebi_db_server_prod')
airflow_server_prod = Variable.get('airflow_server_prod')

# -S server, -d database
# -E trusted connection, -i input file
# -o output file, -s, use comma to separate fields
# -W trim white space, -X security measure for automated envs
query_cmd = (f'sqlcmd -S {ebi_db_server_prod} -d FI_DM_EBI -E '
             f'-i {path}\\bmt_query.sql '
    # 'start_date': datetime.datetime(2021, 8, 2, 0, 0),
    'email': '*****@*****.**',
    'email_on_failure': False,
    'email_on_retry': False,
    # 'schedule_interval': '@once',
    'retries': 0,
    'retry_delay': timedelta(minutes=30),
}
with DAG(dag_id=DAG_NAME,
         default_args=args,
         start_date=datetime.datetime(2021, 8, 5, 20, 0),
         schedule_interval='0 13,14,15,16,17,18,19,20,21,22,23,0,1 * * *',
         tags=['HOM', 'Movimientos', 'Cuentas']) as dag:
    sensor_cuenta_trn = ExternalTaskSensor(
        task_id='sensor_trn_cuenta',
        external_dag_id='dag-sii-bch-ing-ab-trn-cue-mov',
        external_task_id='trn_cuenta',
    )
    sensor_movimientos_trn = ExternalTaskSensor(
        task_id='sensor_trn_movimientos',
        external_dag_id='dag-sii-bch-ing-ab-trn-cue-mov',
        external_task_id='trn_movimientos',
    )
    start = DummyOperator(task_id='start', )
    hom_cuenta = SubDagOperator(
        task_id='hom_cuenta',
        subdag=subdag(
            DAG_NAME, 'hom_cuenta', args,
            'gs://yas-sii-int-des-dev/AB/config/PAR_SII_BCH_ELT_AB_TRN_HOM_CUENTA.json'
        ),
    )
Esempio n. 23
0
    'retry_delay': timedelta(minutes=5)
}
dag = DAG(
    'load_data_warehouse',
    default_args=default_args,
    description='Load Data Warehouse',
    schedule_interval='@daily',
    start_date=days_ago(1),
    tags=['dw'],
    is_paused_upon_creation=False
)

wait_for_init = ExternalTaskSensor(
    task_id='wait_for_init',
    external_dag_id='initialize_etl_environment',
    execution_date_fn = lambda x: datetime(2021, 1, 1, 0, 0, 0, 0, pytz.UTC),
    timeout=1,
    dag=dag
)


wait_for_oltp = ExternalTaskSensor(
    task_id='wait_for_oltp',
    external_dag_id='import_main_data',
    execution_date_fn= lambda x: get_most_recent_dag_run('import_main_data').execution_date,
    timeout=120,
    dag=dag
)

wait_for_flat_files = ExternalTaskSensor(
    task_id='wait_for_flat_files',
Esempio n. 24
0
import datetime

import airflow.utils.dates
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.sensors.external_task import ExternalTaskSensor

dag1 = DAG(
    dag_id="figure_6_20_dag_1",
    start_date=airflow.utils.dates.days_ago(3),
    schedule_interval="0 16 * * *",
)
dag2 = DAG(
    dag_id="figure_6_20_dag_2",
    start_date=airflow.utils.dates.days_ago(3),
    schedule_interval="0 18 * * *",
)

DummyOperator(task_id="copy_to_raw", dag=dag1) >> DummyOperator(
    task_id="process_supermarket", dag=dag1)

wait = ExternalTaskSensor(
    task_id="wait_for_process_supermarket",
    external_dag_id="figure_6_20_dag_1",
    external_task_id="process_supermarket",
    execution_delta=datetime.timedelta(hours=6),
    dag=dag2,
)
report = DummyOperator(task_id="report", dag=dag2)
wait >> report
Esempio n. 25
0
    # 'start_date': datetime.datetime(2021, 8, 2, 0, 0),
    'email': '*****@*****.**',
    'email_on_failure': False,
    'email_on_retry': False,
    # 'schedule_interval': '@once',
    'retries': 0,
    'retry_delay': timedelta(minutes=30),
}
with DAG(dag_id=DAG_NAME,
         default_args=args,
         start_date=datetime.datetime(2021, 8, 5, 18, 0),
         schedule_interval='0 13,14,15,16,17,18,19,20,21,22,23,0,1 * * *',
         tags=['TRN', 'Movimientos', 'Cuentas']) as dag:
    sensor_cuenta_raw = ExternalTaskSensor(
        task_id='sensor_raw_cuenta',
        external_dag_id='dag-sii-bch-ing-ab-raw-cue-mov',
        external_task_id='sii-bch-ing-ab-raw-cuenta',
    )
    sensor_movimientos_raw = ExternalTaskSensor(
        task_id='sensor_raw_movimientos',
        external_dag_id='dag-sii-bch-ing-ab-raw-cue-mov',
        external_task_id='sii-bch-ing-ab-raw-movimientos',
    )
    start = DummyOperator(task_id='start', )
    trn_cuenta = SubDagOperator(
        task_id='trn_cuenta',
        subdag=subdag(
            DAG_NAME, 'trn_cuenta', args,
            'gs://yas-sii-int-des-dev/AB/config/PAR_SII_BCH_ELT_AB_TRN_HOM_CUENTA.json'
        ),
    )
Esempio n. 26
0
def sub_dag_processing():
    """ SubDAG: 
    external_sensor (waiting for DB_1 update)  ->
    print_log (pulling xcom with count for rows from DB_1 and print it) ->
    delete_file (run file in TRIGGER_DIR) -> 
    print_status (print task_instance details)
    +
    near example of TaskGroup
    """
    sub_dag = DAG(
        dag_id=f"{DAG_ID}.{SUB_DAG_ID}", 
        default_args=DEFAULT_ARGS,
        schedule_interval=CONFIGS[DAG_ID]["schedule_interval"],
        start_date=CONFIGS[DAG_ID]["start_date"],
        tags=["example"]
    )

    with sub_dag:

        @task()
        def print_logs():
            ti = get_current_context()
            for db in DBS:
                msg = ti["ti"].xcom_pull(
                    task_ids=f"query", dag_id=f"dag_id_{db}",
                    key=f"{db}_rows_count", include_prior_dates=True,
                )
                print(f"the pulled message is: {msg}")


        def create_section():
            """
            Create tasks in the outer section.
            There is broken link in the course, so I copypasted example from gridU
            """
            dummies = [DummyOperator(task_id=f'task-{i + 1}') for i in range(5)]

            with TaskGroup("inside_section_1") as inside_section_1:
                _ = [DummyOperator(task_id=f'task-{i + 1}',) for i in range(3)]

            with TaskGroup("inside_section_2") as inside_section_2:
                _ = [DummyOperator(task_id=f'task-{i + 1}',) for i in range(3)]

            dummies[-1] >> inside_section_1
            dummies[-2] >> inside_section_2

        ext_sensor = ExternalTaskSensor(
            task_id="waiting_for_DB_1_update",
            external_dag_id=DAG_ID,
            external_task_id="trigger_database_update",
            # execution_delta=timedelta(minutes=5)
        )
        
        task_print_logs = print_logs()

        task_remove_file = BashOperator(task_id="delete_run_file", bash_command=f"rm {TRIGGER_DIR}")

        task_finished = BashOperator(task_id="finish_op", bash_command="echo {{ts_nodash}} ")

        ext_sensor >> task_print_logs >> task_remove_file >> task_finished 


        start = DummyOperator(task_id="start")
        with TaskGroup("section_1", tooltip="Tasks for Section 1") as section_1:
            create_section()

        some_other_task = DummyOperator(task_id="some-other-task")
        with TaskGroup("section_2", tooltip="Tasks for Section 2") as section_2:
            create_section()

        end = DummyOperator(task_id='end')
        start >> section_1 >> some_other_task >> section_2 >> end

    return sub_dag
Esempio n. 27
0
    def test_external_task_sensor_fn_multiple_execution_dates(self):
        bash_command_code = """
{% set s=execution_date.time().second %}
echo "second is {{ s }}"
if [[ $(( {{ s }} % 60 )) == 1 ]]
    then
        exit 1
fi
exit 0
"""
        dag_external_id = TEST_DAG_ID + '_external'
        dag_external = DAG(dag_external_id,
                           default_args=self.args,
                           schedule_interval=timedelta(seconds=1))
        task_external_with_failure = BashOperator(
            task_id="task_external_with_failure",
            bash_command=bash_command_code,
            retries=0,
            dag=dag_external)
        task_external_without_failure = DummyOperator(
            task_id="task_external_without_failure",
            retries=0,
            dag=dag_external)

        task_external_without_failure.run(start_date=DEFAULT_DATE,
                                          end_date=DEFAULT_DATE +
                                          timedelta(seconds=1),
                                          ignore_ti_state=True)

        session = settings.Session()
        TI = TaskInstance
        try:
            task_external_with_failure.run(start_date=DEFAULT_DATE,
                                           end_date=DEFAULT_DATE +
                                           timedelta(seconds=1),
                                           ignore_ti_state=True)
            # The test_with_failure task is excepted to fail
            # once per minute (the run on the first second of
            # each minute).
        except Exception as e:  # pylint: disable=broad-except
            failed_tis = (session.query(TI).filter(
                TI.dag_id == dag_external_id,
                TI.state == State.FAILED,
                TI.execution_date == DEFAULT_DATE + timedelta(seconds=1),
            ).all())
            if len(failed_tis) == 1 and failed_tis[
                    0].task_id == 'task_external_with_failure':
                pass
            else:
                raise e

        dag_id = TEST_DAG_ID
        dag = DAG(dag_id,
                  default_args=self.args,
                  schedule_interval=timedelta(minutes=1))
        task_without_failure = ExternalTaskSensor(
            task_id='task_without_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_without_failure',
            execution_date_fn=lambda dt:
            [dt + timedelta(seconds=i) for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag,
        )
        task_with_failure = ExternalTaskSensor(
            task_id='task_with_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_with_failure',
            execution_date_fn=lambda dt:
            [dt + timedelta(seconds=i) for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag,
        )

        task_without_failure.run(start_date=DEFAULT_DATE,
                                 end_date=DEFAULT_DATE,
                                 ignore_ti_state=True)

        with self.assertRaises(AirflowSensorTimeout):
            task_with_failure.run(start_date=DEFAULT_DATE,
                                  end_date=DEFAULT_DATE,
                                  ignore_ti_state=True)
with DAG(dag_id='external_task_marker_parent',
         start_date=start_date,
         schedule_interval=None,
         tags=['tms_practice']) as parent_dag:

    parent_task = ExternalTaskMarker(
        task_id="parent_task",
        external_dag_id="external_task_marker_child",
        external_tax_id="child_task1",
    )

    with DAG(
            dag_id="external_task_marker_child",
            start_date=start_date,
            schedule_interval=None,
            tags=['tms_practice'],
    ) as child_dag:

        child_task1 = ExternalTaskSensor(
            task_id="child_task1",
            external_dag_id=parent_dag.dag_id,
            external_task_id=parent_task.task_id,
            timeout=600,
            allowed_states=['success'],
            failed_states=['failed', 'skipped'],
            mode='reschedule',
        )

        child_task2 = DummyOperator(task_id='child_task2')
        child_task1 >> child_task2
Esempio n. 29
0
def dag_bag_ext():
    """
    Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies
    set up using ExternalTaskMarker and ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                             |
                             |
    dag_1:                   ---> task_a_1 >> task_b_1
                                                  |
                                                  |
    dag_2:                                        ---> task_a_2 >> task_b_2
                                                                       |
                                                                       |
    dag_3:                                                             ---> task_a_3 >> task_b_3
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(task_id="task_b_0",
                                  external_dag_id="dag_1",
                                  external_task_id="task_a_1",
                                  recursion_depth=3,
                                  dag=dag_0)
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(task_id="task_a_1",
                                  external_dag_id=dag_0.dag_id,
                                  external_task_id=task_b_0.task_id,
                                  dag=dag_1)
    task_b_1 = ExternalTaskMarker(task_id="task_b_1",
                                  external_dag_id="dag_2",
                                  external_task_id="task_a_2",
                                  recursion_depth=2,
                                  dag=dag_1)
    task_a_1 >> task_b_1

    dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_2 = ExternalTaskSensor(task_id="task_a_2",
                                  external_dag_id=dag_1.dag_id,
                                  external_task_id=task_b_1.task_id,
                                  dag=dag_2)
    task_b_2 = ExternalTaskMarker(task_id="task_b_2",
                                  external_dag_id="dag_3",
                                  external_task_id="task_a_3",
                                  recursion_depth=1,
                                  dag=dag_2)
    task_a_2 >> task_b_2

    dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_3 = ExternalTaskSensor(task_id="task_a_3",
                                  external_dag_id=dag_2.dag_id,
                                  external_task_id=task_b_2.task_id,
                                  dag=dag_3)
    task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3)
    task_a_3 >> task_b_3

    for dag in [dag_0, dag_1, dag_2, dag_3]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
Esempio n. 30
0
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}
dag = DAG('import_main_data',
          default_args=default_args,
          description='Import Main Transactions Files',
          schedule_interval='@daily',
          start_date=days_ago(1),
          is_paused_upon_creation=False)

wait_for_init = ExternalTaskSensor(
    task_id='wait_for_init',
    external_dag_id='initialize_etl_environment',
    execution_date_fn=lambda x: datetime(2021, 1, 1, 0, 0, 0, 0, pytz.UTC),
    timeout=1,
    dag=dag)

import_transactions_task = BashOperator(
    task_id='import_transactions',
    bash_command=
    f"""psql {AIRFLOW_CONN_SALES_OLTP} -c "\copy transactions to stdout" | psql {AIRFLOW_CONN_SALES_DW} -c "\copy import.transactions(transaction_id, customer_id, product_id, amount, qty, channel_id, bought_date)  from stdin" """,
    dag=dag,
)

import_channels_task = BashOperator(
    task_id='import_channels',
    bash_command=
    f"""psql {AIRFLOW_CONN_SALES_OLTP} -c "\copy channels to stdout" | psql {AIRFLOW_CONN_SALES_DW} -c "\copy import.channels(channel_id, channel_name) from stdin" """,
    dag=dag,