def render_dag(dag: DAG, tis: Optional[List[TaskInstance]] = None) -> graphviz.Digraph: """ Renders the DAG object to the DOT object. If an task instance list is passed, the nodes will be painted according to task statuses. :param dag: DAG that will be rendered. :type dag: airflow.models.dag.DAG :param tis: List of task instances :type tis: Optional[List[TaskInstance]] :return: Graphviz object :rtype: graphviz.Digraph """ dot = graphviz.Digraph( dag.dag_id, graph_attr={ "rankdir": dag.orientation if dag.orientation else "LR", "labelloc": "t", "label": dag.dag_id, }, ) states_by_task_id = None if tis is not None: states_by_task_id = {ti.task_id: ti.state for ti in tis} _draw_nodes(dag.task_group, dot, states_by_task_id) for edge in dag_edges(dag): # Gets an optional label for the edge; this will be None if none is specified. label = dag.get_edge_info(edge["source_id"], edge["target_id"]).get("label") # Add the edge to the graph with optional label # (we can just use the maybe-None label variable directly) dot.edge(edge["source_id"], edge["target_id"], label) return dot
def render_dag(dag: DAG, tis: Optional[List[TaskInstance]] = None) -> graphviz.Digraph: """ Renders the DAG object to the DOT object. If an task instance list is passed, the nodes will be painted according to task statuses. :param dag: DAG that will be rendered. :type dag: airflow.models.dag.DAG :param tis: List of task instances :type tis: Optional[List[TaskInstance]] :return: Graphviz object :rtype: graphviz.Digraph """ dot = graphviz.Digraph( dag.dag_id, graph_attr={ "rankdir": dag.orientation if dag.orientation else "LR", "labelloc": "t", "label": dag.dag_id, }, ) states_by_task_id = None if tis is not None: states_by_task_id = {ti.task_id: ti.state for ti in tis} _draw_nodes(dag.task_group, dot, states_by_task_id) for edge in dag_edges(dag): dot.edge(edge["source_id"], edge["target_id"]) return dot
def test_dag_edges(): execution_date = pendulum.parse("20200101") with DAG("test_dag_edges", start_date=execution_date) as dag: task1 = DummyOperator(task_id="task1") with TaskGroup("group_a") as group_a: with TaskGroup("group_b") as group_b: task2 = DummyOperator(task_id="task2") task3 = DummyOperator(task_id="task3") task4 = DummyOperator(task_id="task4") task2 >> [task3, task4] task5 = DummyOperator(task_id="task5") task5 << group_b task1 >> group_a with TaskGroup("group_c") as group_c: task6 = DummyOperator(task_id="task6") task7 = DummyOperator(task_id="task7") task8 = DummyOperator(task_id="task8") [task6, task7] >> task8 group_a >> group_c task5 >> task8 task9 = DummyOperator(task_id="task9") task10 = DummyOperator(task_id="task10") group_c >> [task9, task10] with TaskGroup("group_d") as group_d: task11 = DummyOperator(task_id="task11") task12 = DummyOperator(task_id="task12") task11 >> task12 group_d << group_c nodes = task_group_to_dict(dag.task_group) edges = dag_edges(dag) assert extract_node_id(nodes) == { 'id': None, 'children': [ { 'id': 'group_a', 'children': [ { 'id': 'group_a.group_b', 'children': [ { 'id': 'group_a.group_b.task2' }, { 'id': 'group_a.group_b.task3' }, { 'id': 'group_a.group_b.task4' }, { 'id': 'group_a.group_b.downstream_join_id' }, ], }, { 'id': 'group_a.task5' }, { 'id': 'group_a.upstream_join_id' }, { 'id': 'group_a.downstream_join_id' }, ], }, { 'id': 'group_c', 'children': [ { 'id': 'group_c.task6' }, { 'id': 'group_c.task7' }, { 'id': 'group_c.task8' }, { 'id': 'group_c.upstream_join_id' }, { 'id': 'group_c.downstream_join_id' }, ], }, { 'id': 'group_d', 'children': [ { 'id': 'group_d.task11' }, { 'id': 'group_d.task12' }, { 'id': 'group_d.upstream_join_id' }, ], }, { 'id': 'task1' }, { 'id': 'task10' }, { 'id': 'task9' }, ], } assert sorted((e["source_id"], e["target_id"]) for e in edges) == [ ('group_a.downstream_join_id', 'group_c.upstream_join_id'), ('group_a.group_b.downstream_join_id', 'group_a.task5'), ('group_a.group_b.task2', 'group_a.group_b.task3'), ('group_a.group_b.task2', 'group_a.group_b.task4'), ('group_a.group_b.task3', 'group_a.group_b.downstream_join_id'), ('group_a.group_b.task4', 'group_a.group_b.downstream_join_id'), ('group_a.task5', 'group_a.downstream_join_id'), ('group_a.task5', 'group_c.task8'), ('group_a.upstream_join_id', 'group_a.group_b.task2'), ('group_c.downstream_join_id', 'group_d.upstream_join_id'), ('group_c.downstream_join_id', 'task10'), ('group_c.downstream_join_id', 'task9'), ('group_c.task6', 'group_c.task8'), ('group_c.task7', 'group_c.task8'), ('group_c.task8', 'group_c.downstream_join_id'), ('group_c.upstream_join_id', 'group_c.task6'), ('group_c.upstream_join_id', 'group_c.task7'), ('group_d.task11', 'group_d.task12'), ('group_d.upstream_join_id', 'group_d.task11'), ('task1', 'group_a.upstream_join_id'), ]
def test_sub_dag_task_group(): """ Tests dag.sub_dag() updates task_group correctly. """ execution_date = pendulum.parse("20200101") with DAG("test_test_task_group_sub_dag", start_date=execution_date) as dag: task1 = DummyOperator(task_id="task1") with TaskGroup("group234") as group234: _ = DummyOperator(task_id="task2") with TaskGroup("group34") as group34: _ = DummyOperator(task_id="task3") _ = DummyOperator(task_id="task4") with TaskGroup("group6") as group6: _ = DummyOperator(task_id="task6") task7 = DummyOperator(task_id="task7") task5 = DummyOperator(task_id="task5") task1 >> group234 group34 >> task5 group234 >> group6 group234 >> task7 subdag = dag.sub_dag(task_ids_or_regex="task5", include_upstream=True, include_downstream=False) assert extract_node_id(task_group_to_dict(subdag.task_group)) == { 'id': None, 'children': [ { 'id': 'group234', 'children': [ { 'id': 'group234.group34', 'children': [ { 'id': 'group234.group34.task3' }, { 'id': 'group234.group34.task4' }, { 'id': 'group234.group34.downstream_join_id' }, ], }, { 'id': 'group234.upstream_join_id' }, ], }, { 'id': 'task1' }, { 'id': 'task5' }, ], } edges = dag_edges(subdag) assert sorted((e["source_id"], e["target_id"]) for e in edges) == [ ('group234.group34.downstream_join_id', 'task5'), ('group234.group34.task3', 'group234.group34.downstream_join_id'), ('group234.group34.task4', 'group234.group34.downstream_join_id'), ('group234.upstream_join_id', 'group234.group34.task3'), ('group234.upstream_join_id', 'group234.group34.task4'), ('task1', 'group234.upstream_join_id'), ] subdag_task_groups = subdag.task_group.get_task_group_dict() assert subdag_task_groups.keys() == {None, "group234", "group234.group34"} included_group_ids = {"group234", "group234.group34"} included_task_ids = { 'group234.group34.task3', 'group234.group34.task4', 'task1', 'task5' } for task_group in subdag_task_groups.values(): assert task_group.upstream_group_ids.issubset(included_group_ids) assert task_group.downstream_group_ids.issubset(included_group_ids) assert task_group.upstream_task_ids.issubset(included_task_ids) assert task_group.downstream_task_ids.issubset(included_task_ids) for task in subdag.task_group: assert task.upstream_task_ids.issubset(included_task_ids) assert task.downstream_task_ids.issubset(included_task_ids)
def test_build_task_group_with_task_decorator(): """ Test that TaskGroup can be used with the @task decorator. """ from airflow.operators.python import task @task def task_1(): print("task_1") @task def task_2(): return "task_2" @task def task_3(): return "task_3" @task def task_4(task_2_output, task_3_output): print(task_2_output, task_3_output) @task def task_5(): print("task_5") execution_date = pendulum.parse("20200101") with DAG("test_build_task_group_with_task_decorator", start_date=execution_date) as dag: tsk_1 = task_1() with TaskGroup("group234") as group234: tsk_2 = task_2() tsk_3 = task_3() tsk_4 = task_4(tsk_2, tsk_3) tsk_5 = task_5() tsk_1 >> group234 >> tsk_5 # pylint: disable=no-member assert tsk_1.operator in tsk_2.operator.upstream_list assert tsk_1.operator in tsk_3.operator.upstream_list assert tsk_5.operator in tsk_4.operator.downstream_list # pylint: enable=no-member assert extract_node_id(task_group_to_dict(dag.task_group)) == { 'id': None, 'children': [ { 'id': 'group234', 'children': [ { 'id': 'group234.task_2' }, { 'id': 'group234.task_3' }, { 'id': 'group234.task_4' }, { 'id': 'group234.upstream_join_id' }, { 'id': 'group234.downstream_join_id' }, ], }, { 'id': 'task_1' }, { 'id': 'task_5' }, ], } edges = dag_edges(dag) assert sorted((e["source_id"], e["target_id"]) for e in edges) == [ ('group234.downstream_join_id', 'task_5'), ('group234.task_2', 'group234.task_4'), ('group234.task_3', 'group234.task_4'), ('group234.task_4', 'group234.downstream_join_id'), ('group234.upstream_join_id', 'group234.task_2'), ('group234.upstream_join_id', 'group234.task_3'), ('task_1', 'group234.upstream_join_id'), ]