def datahub_lineage_backend_taskflow_demo(): @task( inlets={ "datasets": [ Dataset("snowflake", "mydb.schema.tableA"), Dataset("snowflake", "mydb.schema.tableB"), ], }, outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]}, ) def run_data_task(): # This is where you might run your data tooling. pass run_data_task()
# in the Airflow UI, where it will be even more clear if something # is wrong. hook.get_connection_form_widgets() hook.get_ui_field_behaviour() @pytest.mark.parametrize( ["inlets", "outlets"], [ ( # Airflow 1.10.x uses a dictionary structure for inlets and outlets. # We want the lineage backend to support this structure for backwards # compatability reasons, so this test is not conditional. { "datasets": [Dataset("snowflake", "mydb.schema.tableConsumed")] }, { "datasets": [Dataset("snowflake", "mydb.schema.tableProduced")] }, ), pytest.param( # Airflow 2.x also supports a flattened list for inlets and outlets. # We want to test this capability. [Dataset("snowflake", "mydb.schema.tableConsumed")], [Dataset("snowflake", "mydb.schema.tableProduced")], marks=pytest.mark.skipif( airflow.version.version.startswith("1"), reason="list-style lineage is only supported in Airflow 2.x", ),
def test_hook_airflow_ui(hook): # Simply ensure that these run without issue. These will also show up # in the Airflow UI, where it will be even more clear if something # is wrong. hook.get_connection_form_widgets() hook.get_ui_field_behaviour() @pytest.mark.parametrize( ["inlets", "outlets"], [ pytest.param( # Airflow 1.10.x uses a dictionary structure for inlets and outlets. # We want the lineage backend to support this structure for backwards # compatability reasons, so this test is not conditional. {"datasets": [Dataset("snowflake", "mydb.schema.tableConsumed")]}, {"datasets": [Dataset("snowflake", "mydb.schema.tableProduced")]}, id="airflow-1-10-lineage-syntax", ), pytest.param( # Airflow 2.x also supports a flattened list for inlets and outlets. # We want to test this capability. [Dataset("snowflake", "mydb.schema.tableConsumed")], [Dataset("snowflake", "mydb.schema.tableProduced")], marks=pytest.mark.skipif( airflow.version.version.startswith("1"), reason="list-style lineage is only supported in Airflow 2.x", ), id="airflow-2-lineage-syntax", ), ],
default_args = { "owner": "airflow", "depends_on_past": False, "email": ["*****@*****.**"], "email_on_failure": False, "execution_timeout": timedelta(minutes=5), } with DAG( "datahub_lineage_backend_demo", default_args=default_args, description="An example DAG demonstrating the usage of DataHub's Airflow lineage backend.", schedule_interval=timedelta(days=1), start_date=days_ago(2), tags=["example_tag"], catchup=False, ) as dag: task1 = BashOperator( task_id="run_data_task", dag=dag, bash_command="echo 'This is where you might run your data tooling.'", inlets={ "datasets": [ Dataset("snowflake", "mydb.schema.tableA"), Dataset("snowflake", "mydb.schema.tableB"), ], }, outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]}, )