def test_runtime_metadata_fn(): manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) def runtime_metadata_fn(context, node_info): return { "op_name": context.solid_def.name, "dbt_model": node_info["name"] } assets = load_assets_from_dbt_manifest( manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn) assert_assets_match_project(assets) dbt = MagicMock() assets_job = build_assets_job( "assets_job", assets, resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)}) result = assets_job.execute_in_process() assert result.success for asset in assets: materializations = [ event.event_specific_data.materialization for event in result.events_for_node(asset.op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 1 assert materializations[0].metadata_entries == [ EventMetadataEntry.text(asset.op.name, label="op_name"), EventMetadataEntry.text(asset.op.name, label="dbt_model"), ]
def test_select_from_manifest(dbt_seed, conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) dbt_assets = load_assets_from_dbt_manifest( manifest_json, selected_unique_ids={ "model.dagster_dbt_test_project.sort_by_calories", "model.dagster_dbt_test_project.least_caloric", }, ) result = build_assets_job( "test_job", dbt_assets, resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": test_project_dir, "profiles_dir": dbt_config_dir }) }, ).execute_in_process() assert result.success materializations = [ event.event_specific_data.materialization for event in result.events_for_node(dbt_assets[0].op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 2
def test_load_from_manifest_json(): manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) assets = load_assets_from_dbt_manifest(manifest_json=manifest_json) assert_assets_match_project(assets) dbt = MagicMock() assets_job = build_assets_job( "assets_job", assets, resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)}) assert assets_job.execute_in_process().success
def test_runtime_metadata_fn(): manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) run_results_path = file_relative_path(__file__, "sample_run_results.json") with open(run_results_path, "r") as f: run_results_json = json.load(f) def runtime_metadata_fn(context, node_info): return { "op_name": context.solid_def.name, "dbt_model": node_info["name"] } dbt_assets = load_assets_from_dbt_manifest( manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn) assert_assets_match_project(dbt_assets) dbt = MagicMock() dbt.run.return_value = DbtOutput(run_results_json) assets_job = build_assets_job( "assets_job", dbt_assets, resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)}, ) result = assets_job.execute_in_process() assert result.success materializations = [ event.event_specific_data.materialization for event in result.events_for_node(dbt_assets[0].op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 4 assert materializations[0].metadata_entries == [ MetadataEntry("op_name", value=dbt_assets[0].op.name), MetadataEntry("dbt_model", value=materializations[0].asset_key.path[0]), ]
# ), "pyspark": pyspark_resource.configured(S3_SPARK_CONF), } def asset_metadata(_context, model_info): config = dict(SHARED_SNOWFLAKE_CONF) config["schema"] = model_info["schema"] with connect_snowflake(config=config) as con: df = pd.read_sql(f"SELECT * FROM {model_info['name']} LIMIT 5", con=con) num_rows = con.execute( f"SELECT COUNT(*) FROM {model_info['name']}").fetchone() return { "Data sample": EventMetadata.md(df.to_markdown()), "Rows": num_rows[0] } # this list has one element per dbt model assets = load_assets_from_dbt_manifest( json.load(open(os.path.join(DBT_PROJECT_DIR, "target", "manifest.json"))), runtime_metadata_fn=asset_metadata, io_manager_key="warehouse_io_manager", ) activity_stats = build_assets_job("activity_stats", assets, [], resource_defs=DEV_RESOURCES)