コード例 #1
0
ファイル: dbt.py プロジェクト: trevenrawr/dagster
def scope_dbt_cli_profile_modes():
    # start_marker_dbt_cli_profile_modes
    from dagster_dbt import dbt_cli_resource, dbt_run_op

    from dagster import graph

    @graph
    def my_dbt():
        dbt_run_op()

    my_dbt_graph_dev = my_dbt.to_job(
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": "path/to/dbt/project",
                "profile": "dev"
            })
        })

    my_dbt_graph_prod = my_dbt.to_job(
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": "path/to/dbt/project",
                "profile": "prod"
            })
        })
コード例 #2
0
def scope_dbt_cli_profile_modes():
    # start_marker_dbt_cli_profile_modes
    from dagster import pipeline, solid, ModeDefinition
    from dagster_dbt import dbt_cli_resource

    @solid(required_resource_keys={"dbt"})
    def run_all_models(context):
        context.resources.dbt.run()

    @pipeline(mode_defs=[
        ModeDefinition(
            "dev",
            resource_defs={
                "dbt":
                dbt_cli_resource.configured({
                    "project_dir": "path/to/dbt/project",
                    "profile": "dev"
                })
            },
        ),
        ModeDefinition(
            "prod",
            resource_defs={
                "dbt":
                dbt_cli_resource.configured({
                    "project_dir": "path/to/dbt/project",
                    "profile": "prod"
                })
            },
        ),
    ])
    def my_dbt_pipeline():
        run_all_models()
コード例 #3
0
ファイル: test_resources.py プロジェクト: helloworld/dagster
def get_dbt_resource(project_dir, profiles_dir, **kwargs):
    kwargs = kwargs or {}
    return dbt_cli_resource.configured({
        "project_dir": project_dir,
        "profiles_dir": profiles_dir,
        **kwargs,
    })
コード例 #4
0
ファイル: test_ops.py プロジェクト: keyz/dagster
def test_seed_op(conn_string, test_project_dir, dbt_config_dir):  # pylint: disable=unused-argument

    dbt_resource = dbt_cli_resource.configured(
        {"project_dir": test_project_dir, "profiles_dir": dbt_config_dir}
    )
    dbt_result = dbt_seed_op(build_op_context(resources={"dbt": dbt_resource}))
    assert len(dbt_result.result["results"]) == 1
コード例 #5
0
ファイル: test_asset_defs.py プロジェクト: trevenrawr/dagster
def test_select_from_project(dbt_seed, conn_string, test_project_dir,
                             dbt_config_dir):  # pylint: disable=unused-argument

    dbt_assets = load_assets_from_dbt_project(
        test_project_dir,
        dbt_config_dir,
        select="sort_by_calories subdir.least_caloric")

    assert dbt_assets[0].op.name == "run_dbt_dagster_dbt_test_project_e4753"

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 2
コード例 #6
0
ファイル: test_asset_defs.py プロジェクト: trevenrawr/dagster
def test_node_info_to_asset_key(dbt_seed, conn_string, test_project_dir,
                                dbt_config_dir):  # pylint: disable=unused-argument
    dbt_assets = load_assets_from_dbt_project(
        test_project_dir,
        dbt_config_dir,
        node_info_to_asset_key=lambda node_info: AssetKey(
            ["foo", node_info["name"]]),
    )

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 4
    assert materializations[0].asset_key == AssetKey(
        ["foo", "sort_by_calories"])
コード例 #7
0
ファイル: test_asset_defs.py プロジェクト: trevenrawr/dagster
def test_select_from_manifest(dbt_seed, conn_string, test_project_dir,
                              dbt_config_dir):  # pylint: disable=unused-argument

    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)
    dbt_assets = load_assets_from_dbt_manifest(
        manifest_json,
        selected_unique_ids={
            "model.dagster_dbt_test_project.sort_by_calories",
            "model.dagster_dbt_test_project.least_caloric",
        },
    )

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 2
コード例 #8
0
def scope_dbt_cli_resource_config():
    # start_marker_dbt_cli_resource_config
    from dagster_dbt import dbt_cli_resource

    my_dbt_resource = dbt_cli_resource.configured({
        "project_dir":
        "path/to/dbt/project",
        "profiles_dir":
        "path/to/dbt/profiles"
    })
コード例 #9
0
def scope_dbt_cli_run():
    # start_marker_dbt_cli_run_preconfig
    from dagster import job
    from dagster_dbt import dbt_cli_resource, dbt_run_op

    my_dbt_resource = dbt_cli_resource.configured({"project_dir": "path/to/dbt/project"})

    @job(resource_defs={"dbt": my_dbt_resource})
    def my_dbt_job():
        dbt_run_op()
コード例 #10
0
ファイル: dbt.py プロジェクト: trevenrawr/dagster
def scope_dbt_cli_run_after_another_op():
    # start_marker_dbt_cli_run_after_another_op
    from dagster_dbt import dbt_cli_resource, dbt_run_op, dbt_test_op

    from dagster import job

    my_dbt_resource = dbt_cli_resource.configured(
        {"project_dir": "path/to/dbt/project"})

    @job(resource_defs={"dbt": my_dbt_resource})
    def my_dbt_job():
        dbt_test_op(start_after=dbt_run_op())
コード例 #11
0
def scope_dbt_cli_config_vars():
    # start_marker_dbt_cli_config_vars
    from dagster import job
    from dagster_dbt import dbt_cli_resource

    config = {"vars": {"key": "value"}}

    @job(resource_defs={"dbt": dbt_cli_resource.configured(config)})
    def my_job():
        # ...
        # end_marker_dbt_cli_config_vars
        pass
コード例 #12
0
def scope_dbt_cli_config_exclude_models():
    # start_marker_dbt_cli_config_exclude_models
    from dagster import job
    from dagster_dbt import dbt_cli_resource

    config = {"exclude": ["my_dbt_model+", "path.to.models", "tag:nightly"]}

    @job(resource_defs={"dbt": dbt_cli_resource.configured(config)})
    def my_job():
        # ...
        # end_marker_dbt_cli_config_exclude_models
        pass
コード例 #13
0
def scope_dbt_cli_config_executable():
    # start_marker_dbt_cli_config_executable
    from dagster import job
    from dagster_dbt import dbt_cli_resource

    config = {"dbt_executable": "path/to/dbt/executable"}

    @job(resource_defs={"dbt": dbt_cli_resource.configured(config)})
    def my_job():
        # ...
        # end_marker_dbt_cli_config_executable
        pass
コード例 #14
0
def scope_dbt_cli_run_specific_models():
    # start_marker_dbt_cli_run_specific_models_preconfig
    from dagster import job
    from dagster_dbt import dbt_cli_resource, dbt_run_op

    my_dbt_resource = dbt_cli_resource.configured(
        {"project_dir": "path/to/dbt/project", "models": ["tag:staging"]}
    )

    @job(resource_defs={"dbt": my_dbt_resource})
    def my_dbt_job():
        dbt_run_op()
コード例 #15
0
ファイル: test_ops.py プロジェクト: keyz/dagster
def test_run_op(
    dbt_seed, conn_string, test_project_dir, dbt_config_dir
):  # pylint: disable=unused-argument

    dbt_resource = dbt_cli_resource.configured(
        {"project_dir": test_project_dir, "profiles_dir": dbt_config_dir}
    )
    dbt_results = list(dbt_run_op(build_op_context(resources={"dbt": dbt_resource})))

    # includes asset materializations
    assert len(dbt_results) == 5

    assert len(dbt_results[-1].value.result["results"]) == 4
コード例 #16
0
def scope_dbt_cli_config_profile_and_target():
    PROFILE_NAME, TARGET_NAME = "", ""

    # start_marker_dbt_cli_config_profile_and_target
    from dagster import job
    from dagster_dbt import dbt_cli_resource

    config = {"profile": PROFILE_NAME, "target": TARGET_NAME}

    @job(resource_defs={"dbt": dbt_cli_resource.configured(config)})
    def my_job():
        # ...
        # end_marker_dbt_cli_config_profile_and_target
        pass
コード例 #17
0
def scope_dbt_cli_config_exclude_models():
    # start_marker_dbt_cli_config_exclude_models
    from dagster import pipeline, ModeDefinition
    from dagster_dbt import dbt_cli_resource

    config = {"exclude": ["my_dbt_model+", "path.to.models", "tag:nightly"]}

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={"dbt": dbt_cli_resource.configured(config)})
    ])
    def my_pipeline():
        # ...
        # end_marker_dbt_cli_config_exclude_models
        pass
コード例 #18
0
def scope_dbt_cli_config_vars():
    # start_marker_dbt_cli_config_vars
    from dagster import pipeline, ModeDefinition
    from dagster_dbt import dbt_cli_resource

    config = {"vars": {"key": "value"}}

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={"dbt": dbt_cli_resource.configured(config)})
    ])
    def my_pipeline():
        # ...
        # end_marker_dbt_cli_config_vars
        pass
コード例 #19
0
def scope_dbt_cli_config_executable():
    # start_marker_dbt_cli_config_executable
    from dagster import pipeline, ModeDefinition
    from dagster_dbt import dbt_cli_resource

    config = {"dbt_executable": "path/to/dbt/executable"}

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={"dbt": dbt_cli_resource.configured(config)})
    ])
    def my_pipeline():
        # ...
        # end_marker_dbt_cli_config_executable
        pass
コード例 #20
0
def scope_dbt_cli_run():
    # start_marker_dbt_cli_run_preconfig
    from dagster import pipeline, solid, ModeDefinition
    from dagster_dbt import dbt_cli_resource

    my_dbt_resource = dbt_cli_resource.configured(
        {"project_dir": "path/to/dbt/project"})

    @solid(required_resource_keys={"dbt"})
    def run_all_models(context):
        context.resources.dbt.run()

    @pipeline(
        mode_defs=[ModeDefinition(resource_defs={"dbt": my_dbt_resource})])
    def my_dbt_pipeline():
        run_all_models()
コード例 #21
0
def scope_dbt_cli_config_profile_and_target():
    PROFILE_NAME, TARGET_NAME = "", ""

    # start_marker_dbt_cli_config_profile_and_target
    from dagster import pipeline, ModeDefinition
    from dagster_dbt import dbt_cli_resource

    config = {"profile": PROFILE_NAME, "target": TARGET_NAME}

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={"dbt": dbt_cli_resource.configured(config)})
    ])
    def my_pipeline():
        # ...
        # end_marker_dbt_cli_config_profile_and_target
        pass
コード例 #22
0
ファイル: test_ops.py プロジェクト: trevenrawr/dagster
def test_run_test_job(dbt_seed, conn_string, test_project_dir, dbt_config_dir):  # pylint: disable=unused-argument

    dbt_resource = dbt_cli_resource.configured({
        "project_dir": test_project_dir,
        "profiles_dir": dbt_config_dir
    })

    @job(resource_defs={"dbt": dbt_resource})
    def run_test_job():
        dbt_test_op(start_after=dbt_run_op())

    dbt_result = run_test_job.execute_in_process()

    dbt_run_result = dbt_result.output_for_node("dbt_run_op")
    dbt_test_result = dbt_result.output_for_node("dbt_test_op")

    assert len(dbt_run_result.result["results"]) == 4
    assert len(dbt_test_result.result["results"]) == 15
コード例 #23
0
def scope_dbt_cli_run_after_another_solid():
    # start_marker_dbt_cli_run_after_another_solid
    from dagster import pipeline, solid, ModeDefinition
    from dagster_dbt import dbt_cli_resource, DbtCliOutput

    my_dbt_resource = dbt_cli_resource.configured(
        {"project_dir": "path/to/dbt/project"})

    @solid(required_resource_keys={"dbt"})
    def run_models(context) -> DbtCliOutput:
        return context.resources.dbt.run()

    @solid(required_resource_keys={"dbt"})
    def test_models(context, run_result: DbtCliOutput):
        context.log.info(f"testing result of `{run_result.command}`!")
        context.resources.dbt.test()

    @pipeline(
        mode_defs=[ModeDefinition(resource_defs={"dbt": my_dbt_resource})])
    def my_dbt_pipeline():
        run_result = run_models()
        test_models(run_result)
コード例 #24
0
ファイル: dbt_pipeline.py プロジェクト: amarrella/dagster
    SHARED_SNOWFLAKE_CONF,
    connect_snowflake,
    snowflake_io_manager_dev,
    snowflake_io_manager_prod,
)

DBT_PROJECT_DIR = file_relative_path(__file__, "../../hacker_news_dbt")
DBT_PROFILES_DIR = DBT_PROJECT_DIR + "/config"

# We define two sets of resources, one for the prod mode, which writes to production schemas and
# one for dev mode, which writes to alternate schemas
PROD_RESOURCES = {
    "dbt":
    dbt_cli_resource.configured({
        "profiles_dir": DBT_PROFILES_DIR,
        "project_dir": DBT_PROJECT_DIR,
        "target": "prod"
    }),
    "warehouse_io_manager":
    snowflake_io_manager_prod,
    # "parquet_io_manager": parquet_io_manager.configured({"base_path": get_system_temp_directory()}),
    "pyspark":
    pyspark_resource,
}

DEV_RESOURCES = {
    "dbt":
    dbt_cli_resource.configured({
        "profiles-dir": DBT_PROFILES_DIR,
        "project-dir": DBT_PROJECT_DIR,
        "target": "dev"
コード例 #25
0
)
from ops.gdelt_mining_ops import enhance_articles, materialize_gdelt_mining_asset, materialize_enhanced_articles_asset
from ops.ml_enrichment_ops import classify_protest_relevancy, get_ml_enrichment_files, store_ml_enrichment_files
from resources.novacene_ml_resource import novacene_ml_api_client


# Resources
#################
DBT_PROFILES_DIR = file_relative_path(__file__, "./dw")
DBT_PROJECT_DIR = file_relative_path(__file__, "./dw")

snowflake_env_variables = config_from_files(['environments/snowflake_env_variables.yaml'])
novacene_env_variables = config_from_files(['environments/novacene_env_variables.yaml'])

my_dbt_resource = dbt_cli_resource.configured({
    "profiles_dir": DBT_PROFILES_DIR, 
    "project_dir": DBT_PROJECT_DIR})

my_novacene_client_client = novacene_ml_api_client.configured(novacene_env_variables)


#Jobs
################
@job(
    resource_defs = {
        'snowflake': snowflake_resource
    },
    config = snowflake_env_variables
)
def mine_gdelt_data():
    # Mine data from GDELT
コード例 #26
0
ファイル: assets.py プロジェクト: trevenrawr/dagster
        optimize.curve_fit(f=model_func,
                           xdata=df.order_date.astype(np.int64),
                           ydata=df.num_orders,
                           p0=[10, 100])[0])


@asset(compute_kind="python", io_manager_key="pandas_io_manager")
def predicted_orders(
        daily_order_summary: pd.DataFrame,
        order_forecast_model: Tuple[float, float]) -> pd.DataFrame:
    """Predicted orders for the next 30 days based on the fit paramters"""
    a, b = order_forecast_model
    start_date = daily_order_summary.order_date.max()
    future_dates = pd.date_range(start=start_date,
                                 end=start_date + pd.DateOffset(days=30))
    predicted_data = model_func(x=future_dates.astype(np.int64), a=a, b=b)
    return pd.DataFrame({
        "order_date": future_dates,
        "num_orders": predicted_data
    })


analytics_assets = AssetGroup(
    airbyte_assets + dbt_assets + [order_forecast_model, predicted_orders],
    resource_defs={
        "airbyte": airbyte_resource.configured(AIRBYTE_CONFIG),
        "dbt": dbt_cli_resource.configured(DBT_CONFIG),
        "pandas_io_manager": pandas_io_manager.configured(PANDAS_IO_CONFIG),
    },
).build_job("Assets")
コード例 #27
0
import pandas as pd
from dagster import MetadataValue, build_assets_job
from dagster.utils import file_relative_path
from dagster_dbt import dbt_cli_resource
from dagster_dbt.asset_defs import load_assets_from_dbt_manifest
from hacker_news_assets.resources import RESOURCES_PROD, RESOURCES_STAGING
from hacker_news_assets.resources.snowflake_io_manager import (
    SHARED_SNOWFLAKE_CONF,
    connect_snowflake,
)

DBT_PROJECT_DIR = file_relative_path(__file__, "../../hacker_news_dbt")
DBT_PROFILES_DIR = DBT_PROJECT_DIR + "/config"
dbt_staging_resource = dbt_cli_resource.configured({
    "profiles-dir": DBT_PROFILES_DIR,
    "project-dir": DBT_PROJECT_DIR,
    "target": "staging"
})
dbt_prod_resource = dbt_cli_resource.configured({
    "profiles_dir": DBT_PROFILES_DIR,
    "project_dir": DBT_PROJECT_DIR,
    "target": "prod"
})


def asset_metadata(_context, model_info):
    config = dict(SHARED_SNOWFLAKE_CONF)
    config["schema"] = model_info["schema"]
    with connect_snowflake(config=config) as con:
        df = pd.read_sql(f"SELECT * FROM {model_info['name']} LIMIT 5",
                         con=con)