def test_external_task_sensor(self): self.test_time_sensor() op = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_delta(self): self.test_time_sensor() op = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_delta=timedelta(0), allowed_states=['success'], dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_waits_for_dag_check_existence(self): op = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id="non-existing-dag", external_task_id=None, check_existence=True, dag=self.dag, ) with self.assertRaises(AirflowException): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_waits_for_task_check_existence(self): op = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id="example_bash_operator", external_task_id="non-existing-task", check_existence=True, dag=self.dag, ) with pytest.raises(AirflowException): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_fn(self): self.test_time_sensor() # check that the execution_fn works op1 = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta_1', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=lambda dt: dt + timedelta(0), allowed_states=['success'], dag=self.dag, ) op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) # double check that the execution is being called by failing the test op2 = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta_2', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=lambda dt: dt + timedelta(days=1), allowed_states=['success'], timeout=1, poke_interval=1, dag=self.dag, ) with self.assertRaises(exceptions.AirflowSensorTimeout): op2.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def task_sensor(): """ TODO 샘플 예제 잘 작동 안함.. DAG가 동일 파일에 두개 있으면, DAG두개 생성됨 각각의 dag가 다른 dag를 참조 할수 있음 """ with makeDag("task_marker_test") as parent_dag: parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="task_sensor_test", external_task_id="child_task1", ) with makeDag("task_sensor_test") as child_dag: child_task1 = ExternalTaskSensor( task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, timeout=600, allowed_states=['success'], failed_states=['failed', 'skipped'], mode="reschedule", ) child_task2 = DummyOperator(task_id="child_task2") child_task1 >> child_task2
def test_external_task_sensor_failed_states_as_success(self): self.test_time_sensor() op = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, allowed_states=["failed"], failed_states=["success"], dag=self.dag, ) with pytest.raises(AirflowException) as ctx: op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) assert str( ctx.value ) == "The external task " "time_sensor_check in DAG " "unit_test_dag failed."
def test_catch_invalid_allowed_states(self): with self.assertRaises(ValueError): ExternalTaskSensor( task_id='test_external_task_sensor_check_1', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, allowed_states=['invalid_state'], dag=self.dag, ) with self.assertRaises(ValueError): ExternalTaskSensor( task_id='test_external_task_sensor_check_2', external_dag_id=TEST_DAG_ID, external_task_id=None, allowed_states=['invalid_state'], dag=self.dag, )
def test_external_task_sensor_wrong_failed_states(self): with self.assertRaises(ValueError): ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, failed_states=["invalid_state"], dag=self.dag, )
def test_external_dag_sensor(self): other_dag = DAG('other_dag', default_args=self.args, end_date=DEFAULT_DATE, schedule_interval='@once') other_dag.create_dagrun(run_id='test', start_date=DEFAULT_DATE, execution_date=DEFAULT_DATE, state=State.SUCCESS) op = ExternalTaskSensor( task_id='test_external_dag_sensor_check', external_dag_id='other_dag', external_task_id=None, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_fn_multiple_args(self): """Check this task sensor passes multiple args with full context. If no failure, means clean run.""" self.test_time_sensor() def my_func(dt, context): assert context['execution_date'] == dt return dt + timedelta(0) op1 = ExternalTaskSensor( task_id='test_external_task_sensor_multiple_arg_fn', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=my_func, allowed_states=['success'], dag=self.dag, ) op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_catch_overlap_allowed_failed_state(self): with self.assertRaises(AirflowException): ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, allowed_states=[State.SUCCESS], failed_states=[State.SUCCESS], dag=self.dag, )
def test_templated_sensor(self): with self.dag: sensor = ExternalTaskSensor(task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}') instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() assert sensor.external_dag_id == f"dag_{DEFAULT_DATE.date()}" assert sensor.external_task_id == f"task_{DEFAULT_DATE.date()}"
def test_external_task_sensor_fn_kwargs(self): """Check this task sensor passes multiple args with full context. If no failure, means clean run.""" self.test_time_sensor() def my_func(dt, ds_nodash, tomorrow_ds_nodash): assert ds_nodash == dt.strftime("%Y%m%d") assert tomorrow_ds_nodash == (dt + timedelta(days=1)).strftime("%Y%m%d") return dt + timedelta(0) op1 = ExternalTaskSensor( task_id='test_external_task_sensor_fn_kwargs', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=my_func, allowed_states=['success'], dag=self.dag, ) op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_error_delta_and_fn(self): self.test_time_sensor() # Test that providing execution_delta and a function raises an error with self.assertRaises(ValueError): ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_delta=timedelta(0), execution_date_fn=lambda dt: dt, allowed_states=['success'], dag=self.dag, )
def _get_external_task_sensor(self, from_task_id: str, to_task_id: str) -> ExternalTaskSensor: from_pipeline_name = self._task_graph.get_node(from_task_id).obj.pipeline_name from_task_name = self._task_graph.get_node(from_task_id).obj.name from_pipeline_schedule = self._task_graph.get_node(from_task_id).obj.pipeline.schedule to_pipeline_schedule = self._task_graph.get_node(to_task_id).obj.pipeline.schedule return ExternalTaskSensor( task_id=f"{from_pipeline_name}-{from_task_name}-sensor", external_dag_id=from_pipeline_name, external_task_id=from_task_name, execution_date_fn=self._get_execution_date_fn(from_pipeline_schedule, to_pipeline_schedule), mode=conf.EXTERNAL_SENSOR_MODE, poke_interval=conf.EXTERNAL_SENSOR_POKE_INTERVAL, timeout=conf.EXTERNAL_SENSOR_TIMEOUT )
def dag_bag_cyclic(): """ Create a DagBag with DAGs having cyclic dependencies set up by ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 ^ | | | dag_1: | ---> task_a_1 >> task_b_1 | | --------------------------------- """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker(task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor(task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1) task_b_1 = ExternalTaskMarker(task_id="task_b_1", external_dag_id="dag_0", external_task_id="task_a_0", recursion_depth=2, dag=dag_1) task_a_1 >> task_b_1 for dag in [dag_0, dag_1]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
def dag_bag_head_tail(): """ Create a DagBag containing one DAG, with task "head" depending on task "tail" of the previous execution_date. 20200501 20200502 20200510 +------+ +------+ +------+ | head | -->head | --> -->head | | | | / | | | / / | | | | v | / | v | / / | v | | body | / | body | / ... / | body | | | |/ | | |/ / | | | | v / | v / / | v | | tail/| | tail/| / | tail | +------+ +------+ +------+ """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) with DAG("head_tail", start_date=DEFAULT_DATE, schedule_interval="@daily") as dag: head = ExternalTaskSensor( task_id='head', external_dag_id=dag.dag_id, external_task_id="tail", execution_delta=timedelta(days=1), mode="reschedule", ) body = DummyOperator(task_id="body") tail = ExternalTaskMarker( task_id="tail", external_dag_id=dag.dag_id, external_task_id=head.task_id, execution_date="{{ tomorrow_ds_nodash }}", ) head >> body >> tail dag_bag.bag_dag(dag=dag, root_dag=dag) yield dag_bag
def create_sub_dag(parent_dag, sub_dag_name, start_date, schedule_interval): with DAG(dag_id=f'{parent_dag}.{sub_dag_name}', start_date=start_date, schedule_interval=schedule_interval) as dag: # senses if external dag has started task_sensor = ExternalTaskSensor(task_id='task_sensor', external_dag_id=external_dag, external_task_id=None, poke_interval=15) # prints results print_results = PythonOperator(task_id='print_results', python_callable=print_res, op_args=[external_task, external_dag]) # removes file remove_file = BashOperator(task_id='remove_file', bash_command=f'rm -f {path}') # creates a file with appropriate timestamp create_timestamp = BashOperator( task_id='create_timestamp', bash_command='touch ~/timestamp_{{ ts_nodash }}', ) task_sensor >> print_results >> remove_file >> create_timestamp return dag
HOST_DATA_DIR = os.environ["HOST_DATA_DIR"] DATA_RAW_PATH = "/data/raw/{{ ds }}" DATA_SPLIT_PATH = "/data/split/{{ ds }}" DATA_TRANSFORMED_PATH = "/data/transformed/{{ ds }}" MODEL_PATH = "/data/model/{{ ds }}" with DAG( "train_pipeline", default_args=default_args, schedule_interval="@weekly", start_date=days_ago(30), ) as dag: data_sensor = ExternalTaskSensor( task_id="data-sensor", external_dag_id="download", external_task_id="download", check_existence=True, timeout=30, ) split = DockerOperator( image="airflow-split", command=f"-l {DATA_RAW_PATH} -s {DATA_SPLIT_PATH}", network_mode="bridge", task_id="split", do_xcom_push=False, auto_remove=True, volumes=[f"{HOST_DATA_DIR}:/data"], ) fit_transformer = DockerOperator(
schedule_interval='0 12 * * 1') pw = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']['password'] tasks_to_wait_for = [ 'fact_hospital_transaction_clarity', 'dim_hospital_account_clarity', 'dim_guarantor_clarity', 'bridge_hospital_account_coverage_clarity', 'dim_payor_plan_clarity', 'dim_patient_clarity' ] tasks = [] for t in tasks_to_wait_for: task = ExternalTaskSensor(external_dag_id='run_master_etl', external_task_id='exec_ebi_{}_logic'.format(t), execution_delta=timedelta(days=-6, hours=7, minutes=20), task_id='wait_for_{}'.format(t), dag=dag) tasks.append(task) path = 'C:\\Airflow\\send_bmt' ebi_db_server_prod = Variable.get('ebi_db_server_prod') airflow_server_prod = Variable.get('airflow_server_prod') # -S server, -d database # -E trusted connection, -i input file # -o output file, -s, use comma to separate fields # -W trim white space, -X security measure for automated envs query_cmd = (f'sqlcmd -S {ebi_db_server_prod} -d FI_DM_EBI -E ' f'-i {path}\\bmt_query.sql '
# 'start_date': datetime.datetime(2021, 8, 2, 0, 0), 'email': '*****@*****.**', 'email_on_failure': False, 'email_on_retry': False, # 'schedule_interval': '@once', 'retries': 0, 'retry_delay': timedelta(minutes=30), } with DAG(dag_id=DAG_NAME, default_args=args, start_date=datetime.datetime(2021, 8, 5, 20, 0), schedule_interval='0 13,14,15,16,17,18,19,20,21,22,23,0,1 * * *', tags=['HOM', 'Movimientos', 'Cuentas']) as dag: sensor_cuenta_trn = ExternalTaskSensor( task_id='sensor_trn_cuenta', external_dag_id='dag-sii-bch-ing-ab-trn-cue-mov', external_task_id='trn_cuenta', ) sensor_movimientos_trn = ExternalTaskSensor( task_id='sensor_trn_movimientos', external_dag_id='dag-sii-bch-ing-ab-trn-cue-mov', external_task_id='trn_movimientos', ) start = DummyOperator(task_id='start', ) hom_cuenta = SubDagOperator( task_id='hom_cuenta', subdag=subdag( DAG_NAME, 'hom_cuenta', args, 'gs://yas-sii-int-des-dev/AB/config/PAR_SII_BCH_ELT_AB_TRN_HOM_CUENTA.json' ), )
'retry_delay': timedelta(minutes=5) } dag = DAG( 'load_data_warehouse', default_args=default_args, description='Load Data Warehouse', schedule_interval='@daily', start_date=days_ago(1), tags=['dw'], is_paused_upon_creation=False ) wait_for_init = ExternalTaskSensor( task_id='wait_for_init', external_dag_id='initialize_etl_environment', execution_date_fn = lambda x: datetime(2021, 1, 1, 0, 0, 0, 0, pytz.UTC), timeout=1, dag=dag ) wait_for_oltp = ExternalTaskSensor( task_id='wait_for_oltp', external_dag_id='import_main_data', execution_date_fn= lambda x: get_most_recent_dag_run('import_main_data').execution_date, timeout=120, dag=dag ) wait_for_flat_files = ExternalTaskSensor( task_id='wait_for_flat_files',
import datetime import airflow.utils.dates from airflow import DAG from airflow.operators.dummy import DummyOperator from airflow.sensors.external_task import ExternalTaskSensor dag1 = DAG( dag_id="figure_6_20_dag_1", start_date=airflow.utils.dates.days_ago(3), schedule_interval="0 16 * * *", ) dag2 = DAG( dag_id="figure_6_20_dag_2", start_date=airflow.utils.dates.days_ago(3), schedule_interval="0 18 * * *", ) DummyOperator(task_id="copy_to_raw", dag=dag1) >> DummyOperator( task_id="process_supermarket", dag=dag1) wait = ExternalTaskSensor( task_id="wait_for_process_supermarket", external_dag_id="figure_6_20_dag_1", external_task_id="process_supermarket", execution_delta=datetime.timedelta(hours=6), dag=dag2, ) report = DummyOperator(task_id="report", dag=dag2) wait >> report
# 'start_date': datetime.datetime(2021, 8, 2, 0, 0), 'email': '*****@*****.**', 'email_on_failure': False, 'email_on_retry': False, # 'schedule_interval': '@once', 'retries': 0, 'retry_delay': timedelta(minutes=30), } with DAG(dag_id=DAG_NAME, default_args=args, start_date=datetime.datetime(2021, 8, 5, 18, 0), schedule_interval='0 13,14,15,16,17,18,19,20,21,22,23,0,1 * * *', tags=['TRN', 'Movimientos', 'Cuentas']) as dag: sensor_cuenta_raw = ExternalTaskSensor( task_id='sensor_raw_cuenta', external_dag_id='dag-sii-bch-ing-ab-raw-cue-mov', external_task_id='sii-bch-ing-ab-raw-cuenta', ) sensor_movimientos_raw = ExternalTaskSensor( task_id='sensor_raw_movimientos', external_dag_id='dag-sii-bch-ing-ab-raw-cue-mov', external_task_id='sii-bch-ing-ab-raw-movimientos', ) start = DummyOperator(task_id='start', ) trn_cuenta = SubDagOperator( task_id='trn_cuenta', subdag=subdag( DAG_NAME, 'trn_cuenta', args, 'gs://yas-sii-int-des-dev/AB/config/PAR_SII_BCH_ELT_AB_TRN_HOM_CUENTA.json' ), )
def sub_dag_processing(): """ SubDAG: external_sensor (waiting for DB_1 update) -> print_log (pulling xcom with count for rows from DB_1 and print it) -> delete_file (run file in TRIGGER_DIR) -> print_status (print task_instance details) + near example of TaskGroup """ sub_dag = DAG( dag_id=f"{DAG_ID}.{SUB_DAG_ID}", default_args=DEFAULT_ARGS, schedule_interval=CONFIGS[DAG_ID]["schedule_interval"], start_date=CONFIGS[DAG_ID]["start_date"], tags=["example"] ) with sub_dag: @task() def print_logs(): ti = get_current_context() for db in DBS: msg = ti["ti"].xcom_pull( task_ids=f"query", dag_id=f"dag_id_{db}", key=f"{db}_rows_count", include_prior_dates=True, ) print(f"the pulled message is: {msg}") def create_section(): """ Create tasks in the outer section. There is broken link in the course, so I copypasted example from gridU """ dummies = [DummyOperator(task_id=f'task-{i + 1}') for i in range(5)] with TaskGroup("inside_section_1") as inside_section_1: _ = [DummyOperator(task_id=f'task-{i + 1}',) for i in range(3)] with TaskGroup("inside_section_2") as inside_section_2: _ = [DummyOperator(task_id=f'task-{i + 1}',) for i in range(3)] dummies[-1] >> inside_section_1 dummies[-2] >> inside_section_2 ext_sensor = ExternalTaskSensor( task_id="waiting_for_DB_1_update", external_dag_id=DAG_ID, external_task_id="trigger_database_update", # execution_delta=timedelta(minutes=5) ) task_print_logs = print_logs() task_remove_file = BashOperator(task_id="delete_run_file", bash_command=f"rm {TRIGGER_DIR}") task_finished = BashOperator(task_id="finish_op", bash_command="echo {{ts_nodash}} ") ext_sensor >> task_print_logs >> task_remove_file >> task_finished start = DummyOperator(task_id="start") with TaskGroup("section_1", tooltip="Tasks for Section 1") as section_1: create_section() some_other_task = DummyOperator(task_id="some-other-task") with TaskGroup("section_2", tooltip="Tasks for Section 2") as section_2: create_section() end = DummyOperator(task_id='end') start >> section_1 >> some_other_task >> section_2 >> end return sub_dag
def test_external_task_sensor_fn_multiple_execution_dates(self): bash_command_code = """ {% set s=execution_date.time().second %} echo "second is {{ s }}" if [[ $(( {{ s }} % 60 )) == 1 ]] then exit 1 fi exit 0 """ dag_external_id = TEST_DAG_ID + '_external' dag_external = DAG(dag_external_id, default_args=self.args, schedule_interval=timedelta(seconds=1)) task_external_with_failure = BashOperator( task_id="task_external_with_failure", bash_command=bash_command_code, retries=0, dag=dag_external) task_external_without_failure = DummyOperator( task_id="task_external_without_failure", retries=0, dag=dag_external) task_external_without_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) session = settings.Session() TI = TaskInstance try: task_external_with_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) # The test_with_failure task is excepted to fail # once per minute (the run on the first second of # each minute). except Exception as e: # pylint: disable=broad-except failed_tis = (session.query(TI).filter( TI.dag_id == dag_external_id, TI.state == State.FAILED, TI.execution_date == DEFAULT_DATE + timedelta(seconds=1), ).all()) if len(failed_tis) == 1 and failed_tis[ 0].task_id == 'task_external_with_failure': pass else: raise e dag_id = TEST_DAG_ID dag = DAG(dag_id, default_args=self.args, schedule_interval=timedelta(minutes=1)) task_without_failure = ExternalTaskSensor( task_id='task_without_failure', external_dag_id=dag_external_id, external_task_id='task_external_without_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag, ) task_with_failure = ExternalTaskSensor( task_id='task_with_failure', external_dag_id=dag_external_id, external_task_id='task_external_with_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag, ) task_without_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with self.assertRaises(AirflowSensorTimeout): task_with_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
with DAG(dag_id='external_task_marker_parent', start_date=start_date, schedule_interval=None, tags=['tms_practice']) as parent_dag: parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="external_task_marker_child", external_tax_id="child_task1", ) with DAG( dag_id="external_task_marker_child", start_date=start_date, schedule_interval=None, tags=['tms_practice'], ) as child_dag: child_task1 = ExternalTaskSensor( task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, timeout=600, allowed_states=['success'], failed_states=['failed', 'skipped'], mode='reschedule', ) child_task2 = DummyOperator(task_id='child_task2') child_task1 >> child_task2
def dag_bag_ext(): """ Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies set up using ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 | | dag_1: ---> task_a_1 >> task_b_1 | | dag_2: ---> task_a_2 >> task_b_2 | | dag_3: ---> task_a_3 >> task_b_3 """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker(task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor(task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1) task_b_1 = ExternalTaskMarker(task_id="task_b_1", external_dag_id="dag_2", external_task_id="task_a_2", recursion_depth=2, dag=dag_1) task_a_1 >> task_b_1 dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None) task_a_2 = ExternalTaskSensor(task_id="task_a_2", external_dag_id=dag_1.dag_id, external_task_id=task_b_1.task_id, dag=dag_2) task_b_2 = ExternalTaskMarker(task_id="task_b_2", external_dag_id="dag_3", external_task_id="task_a_3", recursion_depth=1, dag=dag_2) task_a_2 >> task_b_2 dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None) task_a_3 = ExternalTaskSensor(task_id="task_a_3", external_dag_id=dag_2.dag_id, external_task_id=task_b_2.task_id, dag=dag_3) task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3) task_a_3 >> task_b_3 for dag in [dag_0, dag_1, dag_2, dag_3]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG('import_main_data', default_args=default_args, description='Import Main Transactions Files', schedule_interval='@daily', start_date=days_ago(1), is_paused_upon_creation=False) wait_for_init = ExternalTaskSensor( task_id='wait_for_init', external_dag_id='initialize_etl_environment', execution_date_fn=lambda x: datetime(2021, 1, 1, 0, 0, 0, 0, pytz.UTC), timeout=1, dag=dag) import_transactions_task = BashOperator( task_id='import_transactions', bash_command= f"""psql {AIRFLOW_CONN_SALES_OLTP} -c "\copy transactions to stdout" | psql {AIRFLOW_CONN_SALES_DW} -c "\copy import.transactions(transaction_id, customer_id, product_id, amount, qty, channel_id, bought_date) from stdin" """, dag=dag, ) import_channels_task = BashOperator( task_id='import_channels', bash_command= f"""psql {AIRFLOW_CONN_SALES_OLTP} -c "\copy channels to stdout" | psql {AIRFLOW_CONN_SALES_DW} -c "\copy import.channels(channel_id, channel_name) from stdin" """, dag=dag,