def test_explicit_failure(): with tempfile.TemporaryDirectory() as tmpdir: run_config = { "resources": { "step_launcher": { "config": { "scratch_dir": tmpdir } }, "io_manager": { "config": { "base_dir": tmpdir } }, } } with instance_for_test() as instance: run = execute_pipeline( pipeline=reconstructable(_define_failure_job), run_config=run_config, instance=instance, raise_on_error=False, ) fd = run.result_for_solid("retry_op").failure_data assert fd.user_failure_data.description == "some failure description" assert fd.user_failure_data.metadata_entries == [ MetadataEntry.float(label="foo", value=1.23) ]
def _timing_to_metadata(timings: List[Dict[str, Any]]) -> List[MetadataEntry]: metadata = [] for timing in timings: if timing["name"] == "execute": desc = "Execution" elif timing["name"] == "compile": desc = "Compilation" else: continue started_at = dateutil.parser.isoparse(timing["started_at"]) completed_at = dateutil.parser.isoparse(timing["completed_at"]) duration = completed_at - started_at metadata.extend( [ MetadataEntry.text( text=started_at.isoformat(timespec="seconds"), label=f"{desc} Started At" ), MetadataEntry.text( text=started_at.isoformat(timespec="seconds"), label=f"{desc} Completed At" ), MetadataEntry.float(value=duration.total_seconds(), label=f"{desc} Duration"), ] ) return metadata
def handle_output(self, context, obj): file_path = os.path.join("my_base_dir", context.step_key, context.name) obj.to_csv(file_path) yield MetadataEntry.int(obj.shape[0], label="number of rows") yield MetadataEntry.float(obj["some_column"].mean(), "some_column mean")
def materialize(_): yield AssetMaterialization( asset_key="all_types", description="a materialization with all metadata types", metadata_entries=[ MetadataEntry.text("text is cool", "text"), MetadataEntry.url("https://bigty.pe/neato", "url"), MetadataEntry.fspath("/tmp/awesome", "path"), MetadataEntry.json({"is_dope": True}, "json"), MetadataEntry.python_artifact(MetadataEntry, "python class"), MetadataEntry.python_artifact(file_relative_path, "python function"), MetadataEntry.float(1.2, "float"), MetadataEntry.int(1, "int"), MetadataEntry.float(float("nan"), "float NaN"), MetadataEntry.int(LONG_INT, "long int"), MetadataEntry.pipeline_run("fake_run_id", "pipeline run"), MetadataEntry.asset(AssetKey("my_asset"), "my asset"), MetadataEntry.table( label="table", records=[ TableRecord(foo=1, bar=2), TableRecord(foo=3, bar=4), ], ), MetadataEntry.table_schema( label="table_schema", schema=TableSchema( columns=[ TableColumn( name="foo", type="integer", constraints=TableColumnConstraints( unique=True), ), TableColumn(name="bar", type="string"), ], constraints=TableConstraints(other=["some constraint" ], ), ), ), ], ) yield Output(None)
def backcompat_materialize(_): yield Materialization( asset_key="all_types", description="a materialization with all metadata types", metadata_entries=[ MetadataEntry.text("text is cool", "text"), MetadataEntry.url("https://bigty.pe/neato", "url"), MetadataEntry.fspath("/tmp/awesome", "path"), MetadataEntry.json({"is_dope": True}, "json"), MetadataEntry.python_artifact(MetadataEntry, "python class"), MetadataEntry.python_artifact(file_relative_path, "python function"), MetadataEntry.float(1.2, "float"), MetadataEntry.int(1, "int"), MetadataEntry.float(float("nan"), "float NaN"), MetadataEntry.int(LONG_INT, "long int"), MetadataEntry.pipeline_run("fake_run_id", "pipeline run"), MetadataEntry.asset(AssetKey("my_asset"), "my asset"), ], ) yield Output(None)
def result_to_materialization( result: Dict[str, Any], asset_key_prefix: List[str] = None, docs_url: str = None ) -> Optional[AssetMaterialization]: """ This is a hacky solution that attempts to consolidate parsing many of the potential formats that dbt can provide its results in. This is known to work for CLI Outputs for dbt versions 0.18+, as well as RPC responses for a similar time period, but as the RPC response schema is not documented nor enforced, this can become out of date easily. """ asset_key_prefix = check.opt_list_param(asset_key_prefix, "asset_key_prefix", of_type=str) # status comes from set of fields rather than "status" if "fail" in result: success = not result.get("fail") and not result.get("skip") and not result.get("error") else: success = result["status"] == "success" if not success: return None # all versions represent timing the same way metadata = [ MetadataEntry.float(value=result["execution_time"], label="Execution Time (seconds)") ] + _timing_to_metadata(result["timing"]) # working with a response that contains the node block (RPC and CLI 0.18.x) if "node" in result: unique_id = result["node"]["unique_id"] metadata += _node_result_to_metadata(result["node"]) else: unique_id = result["unique_id"] id_prefix = unique_id.split(".") # only generate materializations for models if id_prefix[0] != "model": return None if docs_url: metadata = [ MetadataEntry.url(url=f"{docs_url}#!/model/{unique_id}", label="docs_url") ] + metadata return AssetMaterialization( description=f"dbt node: {unique_id}", metadata_entries=metadata, asset_key=asset_key_prefix + id_prefix, )
def _get_metadata(self, result: Dict[str, Any]) -> List[MetadataEntry]: return [ MetadataEntry.float(value=result["execution_time"], label="Execution Time (seconds)") ]