def dag_bag_multiple(): """ Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker. """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule_interval="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule_interval="@daily") dag_bag.bag_dag(dag=daily_dag, root_dag=daily_dag) dag_bag.bag_dag(dag=agg_dag, root_dag=agg_dag) daily_task = DummyOperator(task_id="daily_tas", dag=daily_dag) start = DummyOperator(task_id="start", dag=agg_dag) for i in range(25): task = ExternalTaskMarker(task_id=f"{daily_task.task_id}_{i}", external_dag_id=daily_dag.dag_id, external_task_id=daily_task.task_id, execution_date="{{ macros.ds_add(ds, -1 * %s) }}" % i, dag=agg_dag) start >> task yield dag_bag
def dag_bag_head_tail(): """ Create a DagBag containing one DAG, with task "head" depending on task "tail" of the previous execution_date. 20200501 20200502 20200510 +------+ +------+ +------+ | head | -->head | --> -->head | | | | / | | | / / | | | | v | / | v | / / | v | | body | / | body | / ... / | body | | | |/ | | |/ / | | | | v / | v / / | v | | tail/| | tail/| / | tail | +------+ +------+ +------+ """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) with DAG("head_tail", start_date=DEFAULT_DATE, schedule_interval="@daily") as dag: head = ExternalTaskSensor( task_id='head', external_dag_id=dag.dag_id, external_task_id="tail", execution_delta=timedelta(days=1), mode="reschedule", ) body = DummyOperator(task_id="body") tail = ExternalTaskMarker( task_id="tail", external_dag_id=dag.dag_id, external_task_id=head.task_id, execution_date="{{ tomorrow_ds_nodash }}", ) head >> body >> tail dag_bag.bag_dag(dag=dag, root_dag=dag) yield dag_bag
from airflow import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.sensors.external_task_sensor import ExternalTaskMarker, ExternalTaskSensor start_date = datetime.datetime(2015, 1, 1) with DAG( dag_id="example_external_task_marker_parent", start_date=start_date, schedule_interval=None, tags=['example'], ) as parent_dag: # [START howto_operator_external_task_marker] parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="example_external_task_marker_child", external_task_id="child_task1") # [END howto_operator_external_task_marker] with DAG( dag_id="example_external_task_marker_child", start_date=start_date, schedule_interval=None, tags=['example'], ) as child_dag: # [START howto_operator_external_task_sensor] child_task1 = ExternalTaskSensor(task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, mode="reschedule") # [END howto_operator_external_task_sensor]
def dag_bag_ext(): """ Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies set up using ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 | | dag_1: ---> task_a_1 >> task_b_1 | | dag_2: ---> task_a_2 >> task_b_2 | | dag_3: ---> task_a_3 >> task_b_3 """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker(task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor(task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1) task_b_1 = ExternalTaskMarker(task_id="task_b_1", external_dag_id="dag_2", external_task_id="task_a_2", recursion_depth=2, dag=dag_1) task_a_1 >> task_b_1 dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None) task_a_2 = ExternalTaskSensor(task_id="task_a_2", external_dag_id=dag_1.dag_id, external_task_id=task_b_1.task_id, dag=dag_2) task_b_2 = ExternalTaskMarker(task_id="task_b_2", external_dag_id="dag_3", external_task_id="task_a_3", recursion_depth=1, dag=dag_2) task_a_2 >> task_b_2 dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None) task_a_3 = ExternalTaskSensor(task_id="task_a_3", external_dag_id=dag_2.dag_id, external_task_id=task_b_2.task_id, dag=dag_3) task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3) task_a_3 >> task_b_3 for dag in [dag_0, dag_1, dag_2, dag_3]: dag_bag.bag_dag(dag, None, dag) return dag_bag
def test_serialized_fields(self): self.assertTrue({"recursion_depth"}.issubset( ExternalTaskMarker.get_serialized_fields()))