Exemple #1
0
def task_sensor():
    """
        TODO 샘플 예제 잘 작동 안함..

        DAG가 동일 파일에 두개 있으면, DAG두개 생성됨
        각각의 dag가 다른 dag를 참조 할수 있음
    """
    with makeDag("task_marker_test") as parent_dag:
        parent_task = ExternalTaskMarker(
            task_id="parent_task",
            external_dag_id="task_sensor_test",
            external_task_id="child_task1",
        )

    with makeDag("task_sensor_test") as child_dag:
        child_task1 = ExternalTaskSensor(
            task_id="child_task1",
            external_dag_id=parent_dag.dag_id,
            external_task_id=parent_task.task_id,
            timeout=600,
            allowed_states=['success'],
            failed_states=['failed', 'skipped'],
            mode="reschedule",
        )

        child_task2 = DummyOperator(task_id="child_task2")
        child_task1 >> child_task2
Exemple #2
0
def dag_bag_multiple():
    """
    Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker.
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)
    daily_dag = DAG("daily_dag",
                    start_date=DEFAULT_DATE,
                    schedule_interval="@daily")
    agg_dag = DAG("agg_dag",
                  start_date=DEFAULT_DATE,
                  schedule_interval="@daily")
    dag_bag.bag_dag(dag=daily_dag, root_dag=daily_dag)
    dag_bag.bag_dag(dag=agg_dag, root_dag=agg_dag)

    daily_task = DummyOperator(task_id="daily_tas", dag=daily_dag)

    start = DummyOperator(task_id="start", dag=agg_dag)
    for i in range(25):
        task = ExternalTaskMarker(
            task_id=f"{daily_task.task_id}_{i}",
            external_dag_id=daily_dag.dag_id,
            external_task_id=daily_task.task_id,
            execution_date="{{ macros.ds_add(ds, -1 * %s) }}" % i,
            dag=agg_dag,
        )
        start >> task

    yield dag_bag
Exemple #3
0
def dag_bag_cyclic():
    """
    Create a DagBag with DAGs having cyclic dependencies set up by ExternalTaskMarker and
    ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                  ^          |
                  |          |
    dag_1:        |          ---> task_a_1 >> task_b_1
                  |                               |
                  ---------------------------------

    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(task_id="task_b_0",
                                  external_dag_id="dag_1",
                                  external_task_id="task_a_1",
                                  recursion_depth=3,
                                  dag=dag_0)
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(task_id="task_a_1",
                                  external_dag_id=dag_0.dag_id,
                                  external_task_id=task_b_0.task_id,
                                  dag=dag_1)
    task_b_1 = ExternalTaskMarker(task_id="task_b_1",
                                  external_dag_id="dag_0",
                                  external_task_id="task_a_0",
                                  recursion_depth=2,
                                  dag=dag_1)
    task_a_1 >> task_b_1

    for dag in [dag_0, dag_1]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
    def test_serialized_external_task_marker(self):
        dag = DAG('test_serialized_external_task_marker',
                  start_date=DEFAULT_DATE)
        task = ExternalTaskMarker(
            task_id="parent_task",
            external_dag_id="external_task_marker_child",
            external_task_id="child_task1",
            dag=dag,
        )

        serialized_op = SerializedBaseOperator.serialize_operator(task)
        deserialized_op = SerializedBaseOperator.deserialize_operator(
            serialized_op)
        assert deserialized_op.task_type == 'ExternalTaskMarker'
        assert getattr(deserialized_op,
                       'external_dag_id') == 'external_task_marker_child'
        assert getattr(deserialized_op, 'external_task_id') == 'child_task1'
Exemple #5
0
def dag_bag_head_tail():
    """
    Create a DagBag containing one DAG, with task "head" depending on task "tail" of the
    previous execution_date.

    20200501     20200502                 20200510
    +------+     +------+                 +------+
    | head |    -->head |    -->         -->head |
    |  |   |   / |  |   |   /           / |  |   |
    |  v   |  /  |  v   |  /           /  |  v   |
    | body | /   | body | /     ...   /   | body |
    |  |   |/    |  |   |/           /    |  |   |
    |  v   /     |  v   /           /     |  v   |
    | tail/|     | tail/|          /      | tail |
    +------+     +------+                 +------+
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)
    with DAG("head_tail", start_date=DEFAULT_DATE,
             schedule_interval="@daily") as dag:
        head = ExternalTaskSensor(
            task_id='head',
            external_dag_id=dag.dag_id,
            external_task_id="tail",
            execution_delta=timedelta(days=1),
            mode="reschedule",
        )
        body = DummyOperator(task_id="body")
        tail = ExternalTaskMarker(
            task_id="tail",
            external_dag_id=dag.dag_id,
            external_task_id=head.task_id,
            execution_date="{{ tomorrow_ds_nodash }}",
        )
        head >> body >> tail

    dag_bag.bag_dag(dag=dag, root_dag=dag)

    yield dag_bag
import datetime
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.sensors.external_task import ExternalTaskMarker, ExternalTaskSensor

start_date = datetime.datetime(2015, 1, 1)

with DAG(dag_id='external_task_marker_parent',
         start_date=start_date,
         schedule_interval=None,
         tags=['tms_practice']) as parent_dag:

    parent_task = ExternalTaskMarker(
        task_id="parent_task",
        external_dag_id="external_task_marker_child",
        external_tax_id="child_task1",
    )

    with DAG(
            dag_id="external_task_marker_child",
            start_date=start_date,
            schedule_interval=None,
            tags=['tms_practice'],
    ) as child_dag:

        child_task1 = ExternalTaskSensor(
            task_id="child_task1",
            external_dag_id=parent_dag.dag_id,
            external_task_id=parent_task.task_id,
            timeout=600,
            allowed_states=['success'],
Exemple #7
0
def dag_bag_ext():
    """
    Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies
    set up using ExternalTaskMarker and ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                             |
                             |
    dag_1:                   ---> task_a_1 >> task_b_1
                                                  |
                                                  |
    dag_2:                                        ---> task_a_2 >> task_b_2
                                                                       |
                                                                       |
    dag_3:                                                             ---> task_a_3 >> task_b_3
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(task_id="task_b_0",
                                  external_dag_id="dag_1",
                                  external_task_id="task_a_1",
                                  recursion_depth=3,
                                  dag=dag_0)
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(task_id="task_a_1",
                                  external_dag_id=dag_0.dag_id,
                                  external_task_id=task_b_0.task_id,
                                  dag=dag_1)
    task_b_1 = ExternalTaskMarker(task_id="task_b_1",
                                  external_dag_id="dag_2",
                                  external_task_id="task_a_2",
                                  recursion_depth=2,
                                  dag=dag_1)
    task_a_1 >> task_b_1

    dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_2 = ExternalTaskSensor(task_id="task_a_2",
                                  external_dag_id=dag_1.dag_id,
                                  external_task_id=task_b_1.task_id,
                                  dag=dag_2)
    task_b_2 = ExternalTaskMarker(task_id="task_b_2",
                                  external_dag_id="dag_3",
                                  external_task_id="task_a_3",
                                  recursion_depth=1,
                                  dag=dag_2)
    task_a_2 >> task_b_2

    dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_3 = ExternalTaskSensor(task_id="task_a_3",
                                  external_dag_id=dag_2.dag_id,
                                  external_task_id=task_b_2.task_id,
                                  dag=dag_3)
    task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3)
    task_a_3 >> task_b_3

    for dag in [dag_0, dag_1, dag_2, dag_3]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
Exemple #8
0
 def test_serialized_fields(self):
     self.assertTrue({"recursion_depth"}.issubset(
         ExternalTaskMarker.get_serialized_fields()))
Exemple #9
0
from airflow_utils import set_dag_id

with DAG(dag_id=set_dag_id(__file__) + '-parent',
         start_date=days_ago(1),
         schedule_interval="@daily") as parent_dag:

    start = DummyOperator(task_id='start')

    do_something = BashOperator(task_id='do_something',
                                bash_command="sleep 10s")

    # Use Task Marker in case to clear child task if this task is cleared
    end = ExternalTaskMarker(
        task_id="end",
        external_dag_id="dag-dependency-child",
        external_task_id="child_task1",
    )

    start >> do_something >> end

with DAG(dag_id=set_dag_id(__file__) + '-child',
         start_date=days_ago(1),
         schedule_interval="@daily") as child_dag:

    child_task1 = ExternalTaskSensor(
        task_id="child_task1",
        external_dag_id=parent_dag.dag_id,
        external_task_id=end.task_id,
        timeout=600,
        allowed_states=['success'],