예제 #1
0
def test_explicit_failure():
    with tempfile.TemporaryDirectory() as tmpdir:
        run_config = {
            "resources": {
                "step_launcher": {
                    "config": {
                        "scratch_dir": tmpdir
                    }
                },
                "io_manager": {
                    "config": {
                        "base_dir": tmpdir
                    }
                },
            }
        }
        with instance_for_test() as instance:
            run = execute_pipeline(
                pipeline=reconstructable(_define_failure_job),
                run_config=run_config,
                instance=instance,
                raise_on_error=False,
            )
            fd = run.result_for_solid("retry_op").failure_data
            assert fd.user_failure_data.description == "some failure description"
            assert fd.user_failure_data.metadata_entries == [
                MetadataEntry.float(label="foo", value=1.23)
            ]
예제 #2
0
파일: utils.py 프로젝트: helloworld/dagster
def _timing_to_metadata(timings: List[Dict[str, Any]]) -> List[MetadataEntry]:
    metadata = []
    for timing in timings:
        if timing["name"] == "execute":
            desc = "Execution"
        elif timing["name"] == "compile":
            desc = "Compilation"
        else:
            continue

        started_at = dateutil.parser.isoparse(timing["started_at"])
        completed_at = dateutil.parser.isoparse(timing["completed_at"])
        duration = completed_at - started_at
        metadata.extend(
            [
                MetadataEntry.text(
                    text=started_at.isoformat(timespec="seconds"), label=f"{desc} Started At"
                ),
                MetadataEntry.text(
                    text=started_at.isoformat(timespec="seconds"), label=f"{desc} Completed At"
                ),
                MetadataEntry.float(value=duration.total_seconds(), label=f"{desc} Duration"),
            ]
        )
    return metadata
예제 #3
0
    def handle_output(self, context, obj):
        file_path = os.path.join("my_base_dir", context.step_key, context.name)

        obj.to_csv(file_path)

        yield MetadataEntry.int(obj.shape[0], label="number of rows")
        yield MetadataEntry.float(obj["some_column"].mean(),
                                  "some_column mean")
예제 #4
0
파일: setup.py 프로젝트: helloworld/dagster
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             MetadataEntry.text("text is cool", "text"),
             MetadataEntry.url("https://bigty.pe/neato", "url"),
             MetadataEntry.fspath("/tmp/awesome", "path"),
             MetadataEntry.json({"is_dope": True}, "json"),
             MetadataEntry.python_artifact(MetadataEntry, "python class"),
             MetadataEntry.python_artifact(file_relative_path,
                                           "python function"),
             MetadataEntry.float(1.2, "float"),
             MetadataEntry.int(1, "int"),
             MetadataEntry.float(float("nan"), "float NaN"),
             MetadataEntry.int(LONG_INT, "long int"),
             MetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             MetadataEntry.asset(AssetKey("my_asset"), "my asset"),
             MetadataEntry.table(
                 label="table",
                 records=[
                     TableRecord(foo=1, bar=2),
                     TableRecord(foo=3, bar=4),
                 ],
             ),
             MetadataEntry.table_schema(
                 label="table_schema",
                 schema=TableSchema(
                     columns=[
                         TableColumn(
                             name="foo",
                             type="integer",
                             constraints=TableColumnConstraints(
                                 unique=True),
                         ),
                         TableColumn(name="bar", type="string"),
                     ],
                     constraints=TableConstraints(other=["some constraint"
                                                         ], ),
                 ),
             ),
         ],
     )
     yield Output(None)
예제 #5
0
파일: setup.py 프로젝트: helloworld/dagster
 def backcompat_materialize(_):
     yield Materialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             MetadataEntry.text("text is cool", "text"),
             MetadataEntry.url("https://bigty.pe/neato", "url"),
             MetadataEntry.fspath("/tmp/awesome", "path"),
             MetadataEntry.json({"is_dope": True}, "json"),
             MetadataEntry.python_artifact(MetadataEntry, "python class"),
             MetadataEntry.python_artifact(file_relative_path,
                                           "python function"),
             MetadataEntry.float(1.2, "float"),
             MetadataEntry.int(1, "int"),
             MetadataEntry.float(float("nan"), "float NaN"),
             MetadataEntry.int(LONG_INT, "long int"),
             MetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             MetadataEntry.asset(AssetKey("my_asset"), "my asset"),
         ],
     )
     yield Output(None)
예제 #6
0
파일: utils.py 프로젝트: helloworld/dagster
def result_to_materialization(
    result: Dict[str, Any], asset_key_prefix: List[str] = None, docs_url: str = None
) -> Optional[AssetMaterialization]:
    """
    This is a hacky solution that attempts to consolidate parsing many of the potential formats
    that dbt can provide its results in. This is known to work for CLI Outputs for dbt versions 0.18+,
    as well as RPC responses for a similar time period, but as the RPC response schema is not documented
    nor enforced, this can become out of date easily.
    """

    asset_key_prefix = check.opt_list_param(asset_key_prefix, "asset_key_prefix", of_type=str)

    # status comes from set of fields rather than "status"
    if "fail" in result:
        success = not result.get("fail") and not result.get("skip") and not result.get("error")
    else:
        success = result["status"] == "success"

    if not success:
        return None

    # all versions represent timing the same way
    metadata = [
        MetadataEntry.float(value=result["execution_time"], label="Execution Time (seconds)")
    ] + _timing_to_metadata(result["timing"])

    # working with a response that contains the node block (RPC and CLI 0.18.x)
    if "node" in result:

        unique_id = result["node"]["unique_id"]
        metadata += _node_result_to_metadata(result["node"])
    else:
        unique_id = result["unique_id"]

    id_prefix = unique_id.split(".")

    # only generate materializations for models
    if id_prefix[0] != "model":
        return None

    if docs_url:
        metadata = [
            MetadataEntry.url(url=f"{docs_url}#!/model/{unique_id}", label="docs_url")
        ] + metadata

    return AssetMaterialization(
        description=f"dbt node: {unique_id}",
        metadata_entries=metadata,
        asset_key=asset_key_prefix + id_prefix,
    )
예제 #7
0
 def _get_metadata(self, result: Dict[str, Any]) -> List[MetadataEntry]:
     return [
         MetadataEntry.float(value=result["execution_time"], label="Execution Time (seconds)")
     ]