Ejemplo n.º 1
0
def test_dbt_run_with_airflow_connection_and_profile(profiles_file,
                                                     dbt_project_file,
                                                     model_files,
                                                     airflow_conns):
    """Test execution of DbtRunOperator with a connection and a profiles file.

    An Airflow connection target should still be usable even in the presence of
    profiles file, and vice-versa.
    """
    all_targets = airflow_conns + ("test", )

    for target in all_targets:
        op = DbtRunOperator(
            task_id="dbt_task",
            project_dir=dbt_project_file.parent,
            profiles_dir=profiles_file.parent,
            select=[str(m.stem) for m in model_files],
            target=target,
        )

        execution_results = op.execute({})
        run_result = execution_results["results"][0]

        assert run_result["status"] == RunStatus.Success
        assert op.profiles_dir == profiles_file.parent
        assert op.target == target
Ejemplo n.º 2
0
def test_dbt_run_mocked_all_args():
    """Test mocked dbt run call with all arguments."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir="/path/to/project/",
        profiles_dir="/path/to/profiles/",
        profile="dbt-profile",
        target="dbt-target",
        vars={"target": "override"},
        log_cache_events=True,
        full_refresh=True,
        models=["/path/to/model.sql", "+/another/model.sql+2"],
        fail_fast=True,
        threads=3,
        exclude=["/path/to/model/to/exclude.sql"],
        selector_name=["a-selector"],
        state="/path/to/state/",
    )
    assert op.command == "run"

    config = op.get_dbt_config()
    assert isinstance(config, RunTaskConfig) is True
    assert config.project_dir == "/path/to/project/"
    assert config.profiles_dir == "/path/to/profiles/"
    assert config.profile == "dbt-profile"
    assert config.target == "dbt-target"
    assert config.parsed_vars == {"target": "override"}
    assert config.log_cache_events is True
    assert config.full_refresh is True
    assert config.fail_fast is True
    assert config.threads == 3
    assert config.select == ["/path/to/model.sql", "+/another/model.sql+2"]
    assert config.exclude == ["/path/to/model/to/exclude.sql"]
    assert config.selector_name == ["a-selector"]
    assert config.state == Path("/path/to/state/")
Ejemplo n.º 3
0
def test_dbt_run_models_with_project_from_s3(s3_bucket, s3_hook, profiles_file,
                                             dbt_project_file, model_files):
    """Test execution of DbtRunOperator with a project from s3."""
    bucket = s3_hook.get_bucket(s3_bucket)

    with open(dbt_project_file) as pf:
        project_content = pf.read()
    bucket.put_object(Key="project/dbt_project.yml",
                      Body=project_content.encode())

    for model_file in model_files:
        with open(model_file) as mf:
            model_content = mf.read()
            bucket.put_object(Key=f"project/models/{model_file.name}",
                              Body=model_content.encode())

    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir=f"s3://{s3_bucket}/project/",
        profiles_dir=profiles_file.parent,
        models=[str(m.stem) for m in model_files],
        do_xcom_push=True,
    )
    execution_results = op.execute({})
    run_result = execution_results["results"][0]

    assert run_result["status"] == RunStatus.Success
Ejemplo n.º 4
0
def test_dbt_run_fails_with_non_existent_project(profiles_file,
                                                 dbt_project_file):
    """Test dbt run operator raises an exception when failing due to missing project."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir="/home/fake/project",
        profiles_dir="/home/fake/profiles/",
        full_refresh=True,
    )

    with pytest.raises(AirflowException):
        op.execute({})
Ejemplo n.º 5
0
def test_dbt_run_fails_with_malformed_sql(profiles_file, dbt_project_file,
                                          broken_file):
    """Test dbt run operator raises an exception when failing due to a broken file."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir=dbt_project_file.parent,
        profiles_dir=profiles_file.parent,
        models=[str(broken_file.stem)],
        full_refresh=True,
    )

    with pytest.raises(AirflowException):
        op.execute({})
Ejemplo n.º 6
0
def test_dbt_run_models(profiles_file, dbt_project_file, model_files):
    """Test execution of DbtRunOperator with all models."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir=dbt_project_file.parent,
        profiles_dir=profiles_file.parent,
        models=[str(m.stem) for m in model_files],
        do_xcom_push=True,
    )

    execution_results = op.execute({})
    run_result = execution_results["results"][0]

    assert run_result["status"] == RunStatus.Success
Ejemplo n.º 7
0
def test_dbt_run_non_existent_model(profiles_file, dbt_project_file,
                                    model_files):
    """Test execution of DbtRunOperator with a non-existent model."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir=dbt_project_file.parent,
        profiles_dir=profiles_file.parent,
        models=["fake"],
        full_refresh=True,
        do_xcom_push=True,
    )

    execution_results = op.execute({})

    assert len(execution_results["results"]) == 0
    assert isinstance(json.dumps(execution_results), str)
Ejemplo n.º 8
0
def test_dbt_run_models_full_refresh(profiles_file, dbt_project_file,
                                     model_files):
    """Test dbt run operator with all model files and full-refresh."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir=dbt_project_file.parent,
        profiles_dir=profiles_file.parent,
        models=[str(m.stem) for m in model_files],
        full_refresh=True,
        do_xcom_push=True,
    )
    execution_results = op.execute({})
    run_result = execution_results["results"][0]

    assert run_result["status"] == RunStatus.Success
    assert isinstance(json.dumps(execution_results), str)
Ejemplo n.º 9
0
def test_dbt_run_models_with_airflow_connection(dbt_project_file, model_files,
                                                airflow_conns):
    """Test execution of DbtRunOperator with an Airflow connection target."""
    for conn_id in airflow_conns:
        op = DbtRunOperator(
            task_id="dbt_task",
            project_dir=dbt_project_file.parent,
            models=[str(m.stem) for m in model_files],
            target=conn_id,
        )

        execution_results = op.execute({})
        run_result = execution_results["results"][0]

        assert run_result["status"] == RunStatus.Success
        assert op.profiles_dir is None
        assert op.target == conn_id
Ejemplo n.º 10
0
def test_dbt_run_models_from_s3(s3_bucket, s3_hook, profiles_file,
                                dbt_project_file, model_files):
    """Test execution of DbtRunOperator with all models from s3."""
    bucket = s3_hook.get_bucket(s3_bucket)

    with open(dbt_project_file) as pf:
        project_content = pf.read()
    bucket.put_object(Key="project/dbt_project.yml",
                      Body=project_content.encode())

    with open(profiles_file) as pf:
        profiles_content = pf.read()
    bucket.put_object(Key="project/profiles.yml",
                      Body=profiles_content.encode())

    for model_file in model_files:
        with open(model_file) as mf:
            model_content = mf.read()
            bucket.put_object(Key=f"project/models/{model_file.name}",
                              Body=model_content.encode())

    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir=f"s3://{s3_bucket}/project/",
        profiles_dir=f"s3://{s3_bucket}/project/",
        models=[str(m.stem) for m in model_files],
        do_xcom_push=True,
        do_xcom_push_artifacts=["manifest.json", "run_results.json"],
    )
    ti = FakeTaskInstance()

    execution_results = op.execute({"ti": ti})
    run_result = execution_results["results"][0]

    assert run_result["status"] == RunStatus.Success
    assert "manifest.json" in ti.xcom
    assert "run_results.json" in ti.xcom
    assert ti.xcom["run_results.json"][0]["results"][0]["status"] == "success"
Ejemplo n.º 11
0
def basic_dag(
    dbt_project_file,
    profiles_file,
    model_files,
    seed_files,
    singular_tests_files,
    generic_tests_files,
):
    with DAG(
            dag_id="dbt_dag",
            start_date=DATA_INTERVAL_START,
            catchup=False,
            schedule_interval=None,
            tags=["context-manager", "dbt"],
    ) as dag:
        dbt_seed = DbtSeedOperator(
            task_id="dbt_seed",
            project_dir=dbt_project_file.parent,
            profiles_dir=profiles_file.parent,
            do_xcom_push_artifacts=["run_results.json"],
            target="test",
        )

        dbt_run = DbtRunOperator(
            task_id="dbt_run",
            project_dir=dbt_project_file.parent,
            profiles_dir=profiles_file.parent,
            target="test",
            do_xcom_push_artifacts=["run_results.json"],
            full_refresh=True,
        )

        dbt_test = DbtTestOperator(
            task_id="dbt_test",
            project_dir=dbt_project_file.parent,
            profiles_dir=profiles_file.parent,
            do_xcom_push_artifacts=["run_results.json"],
            target="test",
        )

        dbt_seed >> dbt_run >> dbt_test

    yield dag

    session = settings.Session()
    session.query(DagRun).delete()
Ejemplo n.º 12
0
def test_dbt_run_uses_correct_argument_according_to_version():
    """Test if dbt run operator sets the proper attribute based on dbt version."""
    op = DbtRunOperator(
        task_id="dbt_task",
        project_dir="/path/to/project/",
        profiles_dir="/path/to/profiles/",
        profile="dbt-profile",
        target="dbt-target",
        vars={"target": "override"},
        log_cache_events=True,
        full_refresh=True,
        models=["/path/to/model.sql", "+/another/model.sql+2"],
        fail_fast=True,
        threads=3,
        exclude=["/path/to/model/to/exclude.sql"],
        selector_name=["a-selector"],
        state="/path/to/state/",
    )

    assert op.select == ["/path/to/model.sql", "+/another/model.sql+2"]
    assert getattr(op, "models", None) is None
Ejemplo n.º 13
0
    def generate_dag():
        @task
        def prepare_profiles_dir() -> str:
            return str(profiles_file.parent)

        @task
        def prepare_dbt_project_dir() -> str:
            return str(dbt_project_file.parent)

        profiles_dir = prepare_profiles_dir()
        dbt_project_dir = prepare_dbt_project_dir()

        dbt_seed = DbtSeedOperator(
            task_id="dbt_seed_taskflow",
            project_dir=dbt_project_dir,
            profiles_dir=profiles_dir,
            target="test",
            do_xcom_push_artifacts=["run_results.json"],
        )

        dbt_run = DbtRunOperator(
            task_id="dbt_run_taskflow",
            project_dir=dbt_project_dir,
            profiles_dir=profiles_dir,
            target="test",
            full_refresh=True,
            do_xcom_push_artifacts=["run_results.json"],
        )

        dbt_test = DbtTestOperator(
            task_id="dbt_test_taskflow",
            project_dir=dbt_project_dir,
            profiles_dir=profiles_dir,
            target="test",
            do_xcom_push_artifacts=["run_results.json"],
        )

        dbt_seed >> dbt_run >> dbt_test
        do_xcom_push_artifacts=["sources.json"],
    )

    dbt_seed = DbtSeedOperator(
        task_id="dbt_seed",
        project_dir="/path/to/my/dbt/project/",
        profiles_dir="~/.dbt/",
        target="production",
        profile="my-project",
    )

    dbt_run_incremental = DbtRunOperator(
        task_id="dbt_run_incremental_hourly",
        project_dir="/path/to/my/dbt/project/",
        profiles_dir="~/.dbt/",
        select=["tag:hourly,config.materialized:incremental"],
        exclude=["tag:deprecated"],
        target="production",
        profile="my-project",
        full_refresh=False,
    )

    dbt_run = DbtRunOperator(
        task_id="dbt_run_hourly",
        project_dir="/path/to/my/dbt/project/",
        profiles_dir="~/.dbt/",
        select=["+tag:hourly"],
        exclude=["tag:deprecated,config.materialized:incremental"],
        target="production",
        profile="my-project",
        full_refresh=True,
    )
          f"{longest_execute[1]} seconds!")
    print(f"{longest_compile[0]} took the longest to compile with a time of "
          f"{longest_compile[1]} seconds!")


with DAG(
        dag_id="example_dbt_artifacts",
        schedule_interval="0 0 * * *",
        start_date=days_ago(1),
        catchup=False,
        dagrun_timeout=dt.timedelta(minutes=60),
) as dag:
    dbt_run = DbtRunOperator(
        task_id="dbt_run_daily",
        project_dir="/path/to/my/dbt/project/",
        profiles_dir="~/.dbt/",
        select=["+tag:daily"],
        exclude=["tag:deprecated"],
        target="production",
        profile="my-project",
        full_refresh=True,
        do_xcom_push_artifacts=["manifest.json", "run_results.json"],
    )

    process_artifacts = PythonOperator(
        task_id="process_artifacts",
        python_callable=process_dbt_artifacts,
        provide_context=True,
    )
    dbt_run >> process_artifacts
Ejemplo n.º 16
0
        host="localhost",
        login="******",
        port=5432,
        schema="my_dbt_schema",
        password="******",  # pragma: allowlist secret
        # Other dbt parameters can be added as extras
        extra=json.dumps(dict(threads=4, sslmode="require")),
    )

    session.add(my_conn)
    session.commit()

with DAG(
        dag_id="example_airflow_connection",
        schedule_interval="0 * * * *",
        start_date=days_ago(1),
        catchup=False,
        dagrun_timeout=dt.timedelta(minutes=60),
) as dag:
    dbt_run = DbtRunOperator(
        task_id="dbt_run_hourly",
        target="my_db_connection",
        # Profiles file is not needed as we are using an Airflow connection.
        # If a profiles file is used, the Airflow connection will be merged to the
        # existing targets
        profiles_dir=None,  # Defaults to None so this may be omitted.
        project_dir="/path/to/my/dbt/project/",
        select=["+tag:hourly"],
        exclude=["tag:deprecated"],
    )
Ejemplo n.º 17
0
"""Sample basic DAG which dbt runs a project."""
import datetime as dt

from airflow import DAG
from airflow.utils.dates import days_ago
from airflow_dbt_python.operators.dbt import DbtRunOperator

with DAG(
    dag_id="example_basic_dbt",
    schedule_interval=None,
    start_date=days_ago(1),
    catchup=False,
    dagrun_timeout=dt.timedelta(minutes=60),
    default_args={"retries": 2},
) as dag:
    dbt_run = DbtRunOperator(
        task_id="dbt_run_hourly",
        project_dir="/path/to/my/dbt/project/",
        profiles_dir="~/.dbt/",
        select=["+tag:hourly"],
        exclude=["tag:deprecated"],
        target="production",
        profile="my-project",
        full_refresh=False,
    )
Ejemplo n.º 18
0
from airflow_dbt_python.operators.dbt import DbtDocsGenerateOperator, DbtRunOperator

with DAG(
        dag_id="example_basic_dbt_run_with_s3",
        schedule_interval=None,
        start_date=days_ago(1),
        catchup=False,
        dagrun_timeout=dt.timedelta(minutes=60),
) as dag:
    # Project files will be pulled from "s3://my-bucket/dbt/profiles/key/prefix/"
    dbt_run = DbtRunOperator(
        task_id="dbt_run_hourly",
        project_dir="s3://my-bucket/dbt/project/key/prefix/",
        profiles_dir="s3://my-bucket/dbt/profiles/key/prefix/",
        select=["+tag:hourly"],
        exclude=["tag:deprecated"],
        target="production",
        profile="my-project",
        full_refresh=False,
    )

    # Documentation files (target/manifest.json, target/index.html, and
    # target/catalog.json) will be pushed back to S3 after compilation is done.
    dbt_docs = DbtDocsGenerateOperator(
        task_id="dbt_docs",
        project_dir="s3://my-bucket/dbt/project/key/prefix/",
        profiles_dir="s3://my-bucket/dbt/profiles/key/prefix/",
    )

    dbt_run >> dbt_docs