Example #1
0
def datahub_lineage_backend_taskflow_demo():
    @task(
        inlets={
            "datasets": [
                Dataset("snowflake", "mydb.schema.tableA"),
                Dataset("snowflake", "mydb.schema.tableB"),
            ],
        },
        outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]},
    )
    def run_data_task():
        # This is where you might run your data tooling.
        pass

    run_data_task()
Example #2
0
    # in the Airflow UI, where it will be even more clear if something
    # is wrong.
    hook.get_connection_form_widgets()
    hook.get_ui_field_behaviour()


@pytest.mark.parametrize(
    ["inlets", "outlets"],
    [
        (
            # Airflow 1.10.x uses a dictionary structure for inlets and outlets.
            # We want the lineage backend to support this structure for backwards
            # compatability reasons, so this test is not conditional.
            {
                "datasets":
                [Dataset("snowflake", "mydb.schema.tableConsumed")]
            },
            {
                "datasets":
                [Dataset("snowflake", "mydb.schema.tableProduced")]
            },
        ),
        pytest.param(
            # Airflow 2.x also supports a flattened list for inlets and outlets.
            # We want to test this capability.
            [Dataset("snowflake", "mydb.schema.tableConsumed")],
            [Dataset("snowflake", "mydb.schema.tableProduced")],
            marks=pytest.mark.skipif(
                airflow.version.version.startswith("1"),
                reason="list-style lineage is only supported in Airflow 2.x",
            ),
Example #3
0
def test_hook_airflow_ui(hook):
    # Simply ensure that these run without issue. These will also show up
    # in the Airflow UI, where it will be even more clear if something
    # is wrong.
    hook.get_connection_form_widgets()
    hook.get_ui_field_behaviour()


@pytest.mark.parametrize(
    ["inlets", "outlets"],
    [
        pytest.param(
            # Airflow 1.10.x uses a dictionary structure for inlets and outlets.
            # We want the lineage backend to support this structure for backwards
            # compatability reasons, so this test is not conditional.
            {"datasets": [Dataset("snowflake", "mydb.schema.tableConsumed")]},
            {"datasets": [Dataset("snowflake", "mydb.schema.tableProduced")]},
            id="airflow-1-10-lineage-syntax",
        ),
        pytest.param(
            # Airflow 2.x also supports a flattened list for inlets and outlets.
            # We want to test this capability.
            [Dataset("snowflake", "mydb.schema.tableConsumed")],
            [Dataset("snowflake", "mydb.schema.tableProduced")],
            marks=pytest.mark.skipif(
                airflow.version.version.startswith("1"),
                reason="list-style lineage is only supported in Airflow 2.x",
            ),
            id="airflow-2-lineage-syntax",
        ),
    ],
Example #4
0
default_args = {
    "owner": "airflow",
    "depends_on_past": False,
    "email": ["*****@*****.**"],
    "email_on_failure": False,
    "execution_timeout": timedelta(minutes=5),
}


with DAG(
    "datahub_lineage_backend_demo",
    default_args=default_args,
    description="An example DAG demonstrating the usage of DataHub's Airflow lineage backend.",
    schedule_interval=timedelta(days=1),
    start_date=days_ago(2),
    tags=["example_tag"],
    catchup=False,
) as dag:
    task1 = BashOperator(
        task_id="run_data_task",
        dag=dag,
        bash_command="echo 'This is where you might run your data tooling.'",
        inlets={
            "datasets": [
                Dataset("snowflake", "mydb.schema.tableA"),
                Dataset("snowflake", "mydb.schema.tableB"),
            ],
        },
        outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]},
    )