Ejemplo n.º 1
0
def test_runtime_metadata_fn():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    def runtime_metadata_fn(context, node_info):
        return {
            "op_name": context.solid_def.name,
            "dbt_model": node_info["name"]
        }

    assets = load_assets_from_dbt_manifest(
        manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn)
    assert_assets_match_project(assets)

    dbt = MagicMock()
    assets_job = build_assets_job(
        "assets_job",
        assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)})
    result = assets_job.execute_in_process()
    assert result.success

    for asset in assets:
        materializations = [
            event.event_specific_data.materialization
            for event in result.events_for_node(asset.op.name)
            if event.event_type_value == "ASSET_MATERIALIZATION"
        ]
        assert len(materializations) == 1
        assert materializations[0].metadata_entries == [
            EventMetadataEntry.text(asset.op.name, label="op_name"),
            EventMetadataEntry.text(asset.op.name, label="dbt_model"),
        ]
Ejemplo n.º 2
0
def test_select_from_manifest(dbt_seed, conn_string, test_project_dir,
                              dbt_config_dir):  # pylint: disable=unused-argument

    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)
    dbt_assets = load_assets_from_dbt_manifest(
        manifest_json,
        selected_unique_ids={
            "model.dagster_dbt_test_project.sort_by_calories",
            "model.dagster_dbt_test_project.least_caloric",
        },
    )

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 2
Ejemplo n.º 3
0
def test_load_from_manifest_json():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    assets = load_assets_from_dbt_manifest(manifest_json=manifest_json)
    assert_assets_match_project(assets)

    dbt = MagicMock()
    assets_job = build_assets_job(
        "assets_job",
        assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)})
    assert assets_job.execute_in_process().success
Ejemplo n.º 4
0
def test_runtime_metadata_fn():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    run_results_path = file_relative_path(__file__, "sample_run_results.json")
    with open(run_results_path, "r") as f:
        run_results_json = json.load(f)

    def runtime_metadata_fn(context, node_info):
        return {
            "op_name": context.solid_def.name,
            "dbt_model": node_info["name"]
        }

    dbt_assets = load_assets_from_dbt_manifest(
        manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn)
    assert_assets_match_project(dbt_assets)

    dbt = MagicMock()
    dbt.run.return_value = DbtOutput(run_results_json)
    assets_job = build_assets_job(
        "assets_job",
        dbt_assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)},
    )
    result = assets_job.execute_in_process()
    assert result.success

    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 4
    assert materializations[0].metadata_entries == [
        MetadataEntry("op_name", value=dbt_assets[0].op.name),
        MetadataEntry("dbt_model",
                      value=materializations[0].asset_key.path[0]),
    ]
Ejemplo n.º 5
0
    # ),
    "pyspark":
    pyspark_resource.configured(S3_SPARK_CONF),
}


def asset_metadata(_context, model_info):
    config = dict(SHARED_SNOWFLAKE_CONF)
    config["schema"] = model_info["schema"]
    with connect_snowflake(config=config) as con:
        df = pd.read_sql(f"SELECT * FROM {model_info['name']} LIMIT 5",
                         con=con)
        num_rows = con.execute(
            f"SELECT COUNT(*) FROM {model_info['name']}").fetchone()

    return {
        "Data sample": EventMetadata.md(df.to_markdown()),
        "Rows": num_rows[0]
    }


# this list has one element per dbt model
assets = load_assets_from_dbt_manifest(
    json.load(open(os.path.join(DBT_PROJECT_DIR, "target", "manifest.json"))),
    runtime_metadata_fn=asset_metadata,
    io_manager_key="warehouse_io_manager",
)
activity_stats = build_assets_job("activity_stats",
                                  assets, [],
                                  resource_defs=DEV_RESOURCES)