def render_dag(dag: DAG, tis: Optional[List[TaskInstance]] = None) -> graphviz.Digraph:
    """
    Renders the DAG object to the DOT object.

    If an task instance list is passed, the nodes will be painted according to task statuses.

    :param dag: DAG that will be rendered.
    :type dag: airflow.models.dag.DAG
    :param tis: List of task instances
    :type tis: Optional[List[TaskInstance]]
    :return: Graphviz object
    :rtype: graphviz.Digraph
    """
    dot = graphviz.Digraph(
        dag.dag_id,
        graph_attr={
            "rankdir": dag.orientation if dag.orientation else "LR",
            "labelloc": "t",
            "label": dag.dag_id,
        },
    )
    states_by_task_id = None
    if tis is not None:
        states_by_task_id = {ti.task_id: ti.state for ti in tis}

    _draw_nodes(dag.task_group, dot, states_by_task_id)

    for edge in dag_edges(dag):
        # Gets an optional label for the edge; this will be None if none is specified.
        label = dag.get_edge_info(edge["source_id"], edge["target_id"]).get("label")
        # Add the edge to the graph with optional label
        # (we can just use the maybe-None label variable directly)
        dot.edge(edge["source_id"], edge["target_id"], label)

    return dot
def render_dag(dag: DAG,
               tis: Optional[List[TaskInstance]] = None) -> graphviz.Digraph:
    """
    Renders the DAG object to the DOT object.

    If an task instance list is passed, the nodes will be painted according to task statuses.

    :param dag: DAG that will be rendered.
    :type dag: airflow.models.dag.DAG
    :param tis: List of task instances
    :type tis: Optional[List[TaskInstance]]
    :return: Graphviz object
    :rtype: graphviz.Digraph
    """
    dot = graphviz.Digraph(
        dag.dag_id,
        graph_attr={
            "rankdir": dag.orientation if dag.orientation else "LR",
            "labelloc": "t",
            "label": dag.dag_id,
        },
    )
    states_by_task_id = None
    if tis is not None:
        states_by_task_id = {ti.task_id: ti.state for ti in tis}

    _draw_nodes(dag.task_group, dot, states_by_task_id)

    for edge in dag_edges(dag):
        dot.edge(edge["source_id"], edge["target_id"])

    return dot
Exemple #3
0
def test_dag_edges():
    execution_date = pendulum.parse("20200101")
    with DAG("test_dag_edges", start_date=execution_date) as dag:
        task1 = DummyOperator(task_id="task1")
        with TaskGroup("group_a") as group_a:
            with TaskGroup("group_b") as group_b:
                task2 = DummyOperator(task_id="task2")
                task3 = DummyOperator(task_id="task3")
                task4 = DummyOperator(task_id="task4")
                task2 >> [task3, task4]

            task5 = DummyOperator(task_id="task5")

            task5 << group_b

        task1 >> group_a

        with TaskGroup("group_c") as group_c:
            task6 = DummyOperator(task_id="task6")
            task7 = DummyOperator(task_id="task7")
            task8 = DummyOperator(task_id="task8")
            [task6, task7] >> task8
            group_a >> group_c

        task5 >> task8

        task9 = DummyOperator(task_id="task9")
        task10 = DummyOperator(task_id="task10")

        group_c >> [task9, task10]

        with TaskGroup("group_d") as group_d:
            task11 = DummyOperator(task_id="task11")
            task12 = DummyOperator(task_id="task12")
            task11 >> task12

        group_d << group_c

    nodes = task_group_to_dict(dag.task_group)
    edges = dag_edges(dag)

    assert extract_node_id(nodes) == {
        'id':
        None,
        'children': [
            {
                'id':
                'group_a',
                'children': [
                    {
                        'id':
                        'group_a.group_b',
                        'children': [
                            {
                                'id': 'group_a.group_b.task2'
                            },
                            {
                                'id': 'group_a.group_b.task3'
                            },
                            {
                                'id': 'group_a.group_b.task4'
                            },
                            {
                                'id': 'group_a.group_b.downstream_join_id'
                            },
                        ],
                    },
                    {
                        'id': 'group_a.task5'
                    },
                    {
                        'id': 'group_a.upstream_join_id'
                    },
                    {
                        'id': 'group_a.downstream_join_id'
                    },
                ],
            },
            {
                'id':
                'group_c',
                'children': [
                    {
                        'id': 'group_c.task6'
                    },
                    {
                        'id': 'group_c.task7'
                    },
                    {
                        'id': 'group_c.task8'
                    },
                    {
                        'id': 'group_c.upstream_join_id'
                    },
                    {
                        'id': 'group_c.downstream_join_id'
                    },
                ],
            },
            {
                'id':
                'group_d',
                'children': [
                    {
                        'id': 'group_d.task11'
                    },
                    {
                        'id': 'group_d.task12'
                    },
                    {
                        'id': 'group_d.upstream_join_id'
                    },
                ],
            },
            {
                'id': 'task1'
            },
            {
                'id': 'task10'
            },
            {
                'id': 'task9'
            },
        ],
    }

    assert sorted((e["source_id"], e["target_id"]) for e in edges) == [
        ('group_a.downstream_join_id', 'group_c.upstream_join_id'),
        ('group_a.group_b.downstream_join_id', 'group_a.task5'),
        ('group_a.group_b.task2', 'group_a.group_b.task3'),
        ('group_a.group_b.task2', 'group_a.group_b.task4'),
        ('group_a.group_b.task3', 'group_a.group_b.downstream_join_id'),
        ('group_a.group_b.task4', 'group_a.group_b.downstream_join_id'),
        ('group_a.task5', 'group_a.downstream_join_id'),
        ('group_a.task5', 'group_c.task8'),
        ('group_a.upstream_join_id', 'group_a.group_b.task2'),
        ('group_c.downstream_join_id', 'group_d.upstream_join_id'),
        ('group_c.downstream_join_id', 'task10'),
        ('group_c.downstream_join_id', 'task9'),
        ('group_c.task6', 'group_c.task8'),
        ('group_c.task7', 'group_c.task8'),
        ('group_c.task8', 'group_c.downstream_join_id'),
        ('group_c.upstream_join_id', 'group_c.task6'),
        ('group_c.upstream_join_id', 'group_c.task7'),
        ('group_d.task11', 'group_d.task12'),
        ('group_d.upstream_join_id', 'group_d.task11'),
        ('task1', 'group_a.upstream_join_id'),
    ]
Exemple #4
0
def test_sub_dag_task_group():
    """
    Tests dag.sub_dag() updates task_group correctly.
    """
    execution_date = pendulum.parse("20200101")
    with DAG("test_test_task_group_sub_dag", start_date=execution_date) as dag:
        task1 = DummyOperator(task_id="task1")
        with TaskGroup("group234") as group234:
            _ = DummyOperator(task_id="task2")

            with TaskGroup("group34") as group34:
                _ = DummyOperator(task_id="task3")
                _ = DummyOperator(task_id="task4")

        with TaskGroup("group6") as group6:
            _ = DummyOperator(task_id="task6")

        task7 = DummyOperator(task_id="task7")
        task5 = DummyOperator(task_id="task5")

        task1 >> group234
        group34 >> task5
        group234 >> group6
        group234 >> task7

    subdag = dag.sub_dag(task_ids_or_regex="task5",
                         include_upstream=True,
                         include_downstream=False)

    assert extract_node_id(task_group_to_dict(subdag.task_group)) == {
        'id':
        None,
        'children': [
            {
                'id':
                'group234',
                'children': [
                    {
                        'id':
                        'group234.group34',
                        'children': [
                            {
                                'id': 'group234.group34.task3'
                            },
                            {
                                'id': 'group234.group34.task4'
                            },
                            {
                                'id': 'group234.group34.downstream_join_id'
                            },
                        ],
                    },
                    {
                        'id': 'group234.upstream_join_id'
                    },
                ],
            },
            {
                'id': 'task1'
            },
            {
                'id': 'task5'
            },
        ],
    }

    edges = dag_edges(subdag)
    assert sorted((e["source_id"], e["target_id"]) for e in edges) == [
        ('group234.group34.downstream_join_id', 'task5'),
        ('group234.group34.task3', 'group234.group34.downstream_join_id'),
        ('group234.group34.task4', 'group234.group34.downstream_join_id'),
        ('group234.upstream_join_id', 'group234.group34.task3'),
        ('group234.upstream_join_id', 'group234.group34.task4'),
        ('task1', 'group234.upstream_join_id'),
    ]

    subdag_task_groups = subdag.task_group.get_task_group_dict()
    assert subdag_task_groups.keys() == {None, "group234", "group234.group34"}

    included_group_ids = {"group234", "group234.group34"}
    included_task_ids = {
        'group234.group34.task3', 'group234.group34.task4', 'task1', 'task5'
    }

    for task_group in subdag_task_groups.values():
        assert task_group.upstream_group_ids.issubset(included_group_ids)
        assert task_group.downstream_group_ids.issubset(included_group_ids)
        assert task_group.upstream_task_ids.issubset(included_task_ids)
        assert task_group.downstream_task_ids.issubset(included_task_ids)

    for task in subdag.task_group:
        assert task.upstream_task_ids.issubset(included_task_ids)
        assert task.downstream_task_ids.issubset(included_task_ids)
Exemple #5
0
def test_build_task_group_with_task_decorator():
    """
    Test that TaskGroup can be used with the @task decorator.
    """
    from airflow.operators.python import task

    @task
    def task_1():
        print("task_1")

    @task
    def task_2():
        return "task_2"

    @task
    def task_3():
        return "task_3"

    @task
    def task_4(task_2_output, task_3_output):
        print(task_2_output, task_3_output)

    @task
    def task_5():
        print("task_5")

    execution_date = pendulum.parse("20200101")
    with DAG("test_build_task_group_with_task_decorator",
             start_date=execution_date) as dag:
        tsk_1 = task_1()

        with TaskGroup("group234") as group234:
            tsk_2 = task_2()
            tsk_3 = task_3()
            tsk_4 = task_4(tsk_2, tsk_3)

        tsk_5 = task_5()

        tsk_1 >> group234 >> tsk_5

    # pylint: disable=no-member
    assert tsk_1.operator in tsk_2.operator.upstream_list
    assert tsk_1.operator in tsk_3.operator.upstream_list
    assert tsk_5.operator in tsk_4.operator.downstream_list
    # pylint: enable=no-member

    assert extract_node_id(task_group_to_dict(dag.task_group)) == {
        'id':
        None,
        'children': [
            {
                'id':
                'group234',
                'children': [
                    {
                        'id': 'group234.task_2'
                    },
                    {
                        'id': 'group234.task_3'
                    },
                    {
                        'id': 'group234.task_4'
                    },
                    {
                        'id': 'group234.upstream_join_id'
                    },
                    {
                        'id': 'group234.downstream_join_id'
                    },
                ],
            },
            {
                'id': 'task_1'
            },
            {
                'id': 'task_5'
            },
        ],
    }

    edges = dag_edges(dag)
    assert sorted((e["source_id"], e["target_id"]) for e in edges) == [
        ('group234.downstream_join_id', 'task_5'),
        ('group234.task_2', 'group234.task_4'),
        ('group234.task_3', 'group234.task_4'),
        ('group234.task_4', 'group234.downstream_join_id'),
        ('group234.upstream_join_id', 'group234.task_2'),
        ('group234.upstream_join_id', 'group234.task_3'),
        ('task_1', 'group234.upstream_join_id'),
    ]