コード例 #1
0
def dbt_cli_snapshot_freshness(context) -> Dict:
    """This solid executes ``dbt source snapshot-freshness`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("source", "snapshot-freshness"),
        flags_dict=passthrough_flags_only(context.solid_config,
                                          ("select", "output", "threads")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key="dbt_source_snapshot-freshness_cli_output",
        description=
        "Output from the CLI execution of `dbt source snapshot-freshness`.",
        metadata_entries=[
            EventMetadataEntry.json(cli_output, label="CLI Output")
        ],
    )

    yield Output(cli_output)
コード例 #2
0
def dbt_cli_snapshot(context) -> Dict:
    """This solid executes ``dbt snapshot`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("snapshot", ),
        flags_dict=passthrough_flags_only(context.solid_config,
                                          ("threads", "models", "exclude")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    if context.solid_config["yield_materializations"]:
        yield AssetMaterialization(
            asset_key="dbt_snapshot_cli_output",
            description="Output from the CLI execution of `dbt snapshot`.",
            metadata_entries=[
                EventMetadataEntry.json(cli_output, label="CLI Output")
            ],
        )

    yield Output(cli_output)
コード例 #3
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
         ],
     )
     yield Output(None)
コード例 #4
0
def test_access_partition_keys_from_context_only_one_asset_partitioned():
    upstream_partitions_def = StaticPartitionsDefinition(["a", "b", "c"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            if context.op_def.name == "upstream_asset":
                assert context.asset_partition_key == "b"
            elif context.op_def.name == "downstream_asset":
                assert not context.has_asset_partitions
                with pytest.raises(Exception):  # TODO: better error message
                    assert context.asset_partition_key_range
            else:
                assert False

        def load_input(self, context):
            assert not context.has_asset_partitions

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "b"

    @asset
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(asset_key=AssetKey(["upstream_asset"]),
                             partition="b")
    ]
コード例 #5
0
def dbt_cli_test(context) -> DbtCliOutput:
    """This solid executes ``dbt test`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("test",),
        flags_dict=passthrough_flags_only(
            context.solid_config, ("data", "schema", "fail-fast", "threads", "models", "exclude")
        ),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )
    run_results = parse_run_results(context.solid_config["project-dir"])
    cli_output = {**run_results, **cli_output}

    if context.solid_config["yield_materializations"]:
        yield AssetMaterialization(
            asset_key="dbt_test_cli_output",
            description="Output from the CLI execution of `dbt test`.",
            metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
        )

    yield Output(DbtCliOutput.from_dict(cli_output), output_name="dbt_output")
コード例 #6
0
def _table_data_to_materialization(
    fivetran_output: FivetranOutput,
    asset_key_prefix: List[str],
    schema_name: str,
    table_data: Dict[str, Any],
) -> AssetMaterialization:
    table_name = table_data["name_in_destination"]
    asset_key = asset_key_prefix + [schema_name, table_name]
    if not table_data["enabled"]:
        return None
    metadata = {
        "connector_url":
        MetadataValue.url(
            get_fivetran_connector_url(fivetran_output.connector_details))
    }
    if table_data.get("columns"):
        metadata["column_info"] = MetadataValue.json(table_data.get("columns"))
    return AssetMaterialization(
        asset_key=asset_key,
        description=
        f"Table generated via Fivetran sync: {schema_name}.{table_name}",
        metadata=metadata,
    )
コード例 #7
0
ファイル: many_events.py プロジェクト: sarahmk125/dagster
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata={
                    "table_name":
                    table,
                    "table_path":
                    EventMetadata.path(f"/path/to/{table}"),
                    "table_data": {
                        "name": table
                    },
                    "table_name_big":
                    EventMetadata.url(f"https://bigty.pe/{table}"),
                    "table_blurb":
                    EventMetadata.md(md_str),
                    "big_int":
                    29119888133298982934829348,
                    "float_nan":
                    float("nan"),
                },
            )
コード例 #8
0
ファイル: many_events.py プロジェクト: sarahmk125/dagster
def many_materializations_and_passing_expectations(_context):
    tables = [
        "users",
        "groups",
        "events",
        "friends",
        "pages",
        "fans",
        "event_admins",
        "group_admins",
    ]

    for table in tables:
        yield AssetMaterialization(
            asset_key="table_info",
            metadata={
                "table_path": EventMetadata.path(f"/path/to/{table}.raw"),
            },
        )
        yield ExpectationResult(
            success=True,
            label="{table}.row_count".format(table=table),
            description="Row count passed for {table}".format(table=table),
        )
コード例 #9
0
def sort_by_calories(context, cereals):
    sorted_cereals = sorted(cereals,
                            key=lambda cereal: int(cereal["calories"]))
    least_caloric = sorted_cereals[0]["name"]
    most_caloric = sorted_cereals[-1]["name"]
    context.log.info(f"Least caloric cereal: {least_caloric}")
    context.log.info(f"Most caloric cereal: {most_caloric}")
    fieldnames = list(sorted_cereals[0].keys())
    sorted_cereals_csv_path = os.path.abspath(
        f"output/calories_sorted_{context.run_id}.csv")
    os.makedirs(os.path.dirname(sorted_cereals_csv_path), exist_ok=True)
    with open(sorted_cereals_csv_path, "w") as fd:
        writer = csv.DictWriter(fd, fieldnames)
        writer.writeheader()
        writer.writerows(sorted_cereals)
    yield AssetMaterialization(
        asset_key="sorted_cereals_csv",
        description="Cereals data frame sorted by caloric content",
        metadata={
            "sorted_cereals_csv_path":
            EventMetadata.path(sorted_cereals_csv_path)
        },
    )
    yield Output(None)
コード例 #10
0
    def made_solid(context):
        partition_date = datetime.strptime(context.solid_config["partition"],
                                           DEFAULT_DATE_FORMAT)
        if data_size_fn:
            data_size = data_size_fn(partition_date)
            sleep_time = sleep_factor * data_size

            time.sleep(sleep_time)

        rand = random()
        if error_rate and rand < error_rate:
            raise IntentionalRandomFailure(
                f"random {rand} < error rate {error_rate}")

        if asset_key:
            metadata = {
                "Data size (bytes)": data_size
            } if data_size_fn else None

            yield AssetMaterialization(
                asset_key=asset_key,
                metadata=metadata,
                partition=context.solid_config.get("partition"),
            )
コード例 #11
0
ファイル: utils.py プロジェクト: helloworld/dagster
def _materialization_for_stream(
    name: str,
    stream_info: Dict[str, Any],
    stream_stats: Dict[str, Any],
    asset_key_prefix: List[str],
) -> AssetMaterialization:

    return AssetMaterialization(
        asset_key=asset_key_prefix + [name],
        metadata={
            "schema":
            MetadataValue.table_schema(
                TableSchema(columns=[
                    TableColumn(name=name, type=str(info["type"]))
                    for name, info in stream_info["stream"]["jsonSchema"]
                    ["properties"].items()
                ])),
            "columns":
            ",".join(name for name in stream_info["stream"]["jsonSchema"]
                     ["properties"].keys()),
            **{k: v
               for k, v in stream_stats.items() if v is not None},
        },
    )
コード例 #12
0
ファイル: longitudinal.py プロジェクト: zuik/dagster
    def made_solid(context):
        partition_date = datetime.strptime(context.solid_config["partition"], DEFAULT_DATE_FORMAT)
        if data_size_fn:
            data_size = data_size_fn(partition_date)
            sleep_time = sleep_factor * data_size

            time.sleep(sleep_time)

        if error_rate and random() < error_rate:
            raise Exception("blah")

        if asset_key:
            metadata_entries = materialization_metadata_entries or []
            if data_size_fn:
                metadata_entries.append(EventMetadataEntry.float(data_size, "Data size (bytes)"))

            if len(metadata_entries) == 0:
                metadata_entries = None

            yield AssetMaterialization(
                asset_key=asset_key,
                metadata_entries=metadata_entries,
                partition=context.solid_config.get("partition"),
            )
コード例 #13
0
ファイル: log_file.py プロジェクト: zuik/dagster
def read_file(context):
    relative_filename = context.solid_config["filename"]
    directory = context.solid_config["directory"]
    filename = os.path.join(directory, relative_filename)
    try:
        fstats = os.stat(filename)
        context.log.info("Found file {}".format(relative_filename))
        yield AssetMaterialization(
            asset_key=AssetKey(["log_file", relative_filename]),
            metadata_entries=[
                EventMetadataEntry.fspath(filename),
                EventMetadataEntry.json(
                    {
                        "size": fstats.st_size,
                        "ctime": fstats.st_ctime,
                        "mtime": fstats.st_mtime,
                    },
                    "File stats",
                ),
            ],
        )
        yield Output(relative_filename)
    except FileNotFoundError:
        context.log.error("No file found: {}".format(relative_filename))
コード例 #14
0
ファイル: solids.py プロジェクト: markjm610/dagster
def dbt_cli_snapshot_freshness(context) -> DbtCliResult:
    """This solid executes ``dbt source snapshot-freshness`` via the dbt CLI."""
    logs, raw_output, return_code = execute_dbt(
        context.solid_config["dbt_executable"],
        command=("source", "snapshot-freshness"),
        flags_dict=passthrough_flags_only(context.solid_config,
                                          ("select", "output", "threads")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key=
        "dbt_cli_snapshot_freshness-shell_output",  # TODO: Perhaps derive asset key from CLI flags?
        description=
        "The output of a shell execution of `dbt source snapshot-freshness`.",
        metadata_entries=[
            EventMetadataEntry.float(
                label="return_code",
                value=float(return_code),
                description=
                "The return code of a shell exeuction of `dbt source snapshot-freshness`.",
            ),
            EventMetadataEntry.text(
                label="raw_output",
                text=raw_output,
                description=
                "The raw output of a shell execution of `dbt source snapshot-freshness`.",
            ),
        ],
    )

    yield Output(
        DbtCliResult(logs=logs, raw_output=raw_output,
                     return_code=return_code))
コード例 #15
0
ファイル: setup.py プロジェクト: helloworld/dagster
def asset_yields_observation():
    yield AssetObservation(asset_key=AssetKey("asset_yields_observation"),
                           metadata={"text": "FOO"})
    yield AssetMaterialization(asset_key=AssetKey("asset_yields_observation"))
    yield Output(5)
コード例 #16
0
def test_access_partition_keys_from_context_non_identity_partition_mapping():
    upstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])
    downstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])

    class TrailingWindowPartitionMapping(PartitionMapping):
        """
        Maps each downstream partition to two partitions in the upstream asset: itself and the
        preceding partition.
        """
        def get_upstream_partitions_for_partition_range(
            self,
            downstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            assert downstream_partitions_def
            assert upstream_partitions_def

            start, end = downstream_partition_key_range
            return PartitionKeyRange(str(max(1, int(start) - 1)), end)

        def get_downstream_partitions_for_partition_range(
            self,
            upstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            raise NotImplementedError()

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "2"

        def load_input(self, context):
            start, end = context.asset_partition_key_range
            assert start, end == ("1", "2")

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "2"

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": TrailingWindowPartitionMapping()
        },
    )
    def downstream_asset(context, upstream_asset):
        assert context.output_asset_partition_key() == "2"
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="2")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(AssetKey(["upstream_asset"]), partition="2")
    ]
    assert result.asset_materializations_for_node("downstream_asset") == [
        AssetMaterialization(AssetKey(["downstream_asset"]), partition="2")
    ]
コード例 #17
0
ファイル: setup.py プロジェクト: helloworld/dagster
def yield_partition_materialization():
    yield AssetMaterialization(
        asset_key=AssetKey("yield_partition_materialization"), partition="c")
    yield Output(5)
コード例 #18
0
ファイル: test_type_guide.py プロジェクト: plawler92/dagster
 def save_to_file_materialization(_, cfg, value):
     with open(cfg["path"], "w") as ff:
         ff.write(str(value))
         return AssetMaterialization(
             "path", "Wrote out value to {path}".format(path=path), metadata={"path": path}
         )
コード例 #19
0
ファイル: test_manager.py プロジェクト: prezi/dagster
def test_out_of_pipeline_yield_event():
    manager = Manager()
    assert manager.yield_event(
        AssetMaterialization("foo")) == AssetMaterialization("foo")
コード例 #20
0
def solid_two(_):
    yield AssetMaterialization(asset_key=AssetKey("asset_2"))
    yield AssetMaterialization(asset_key=AssetKey(["path", "to", "asset_3"]))
    yield Output(1)
コード例 #21
0
def solid_normalization(_):
    yield AssetMaterialization(asset_key="path/to-asset_4")
    yield Output(1)
コード例 #22
0
def solid_one(_):
    yield AssetMaterialization(asset_key=AssetKey("asset_1"))
    yield Output(1)
コード例 #23
0
 def materialize_two(_):
     yield AssetMaterialization(asset_key=asset_key_two)
     yield Output(1)
コード例 #24
0
 def materialize_one(_):
     yield AssetMaterialization(asset_key=asset_key_one)
     yield Output(1)
コード例 #25
0
ファイル: setup.py プロジェクト: helloworld/dagster
def solid_asset_a(_):
    yield AssetMaterialization(asset_key="a")
    yield Output(1)
コード例 #26
0
ファイル: setup.py プロジェクト: helloworld/dagster
def solid_partitioned_asset(_):
    yield AssetMaterialization(asset_key="a", partition="partition_1")
    yield Output(1)
コード例 #27
0
ファイル: setup.py プロジェクト: helloworld/dagster
def solid_asset_b(_, num):
    yield AssetMaterialization(asset_key="b")
    time.sleep(0.1)
    yield AssetMaterialization(asset_key="c")
    yield Output(num)
コード例 #28
0
        def handle_output(self, context, obj):
            keys = tuple(context.get_run_scoped_output_identifier())
            self.values[keys] = obj

            yield AssetMaterialization(asset_key="yield_one")
            yield AssetMaterialization(asset_key="yield_two")
コード例 #29
0
ファイル: setup.py プロジェクト: helloworld/dagster
def tag_asset_solid(_):
    yield AssetMaterialization(asset_key="a", tags={"foo": "FOO"})
    yield Output(1)
コード例 #30
0
def _stats_records(run_id):
    now = time.time()
    return [
        _event_record(run_id, "A", now - 325, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            "A",
            now - 225,
            DagsterEventType.STEP_SUCCESS,
            StepSuccessData(duration_ms=100000.0),
        ),
        _event_record(run_id, "B", now - 225, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            "B",
            now - 175,
            DagsterEventType.STEP_FAILURE,
            StepFailureData(error=None, user_failure_data=None),
        ),
        _event_record(run_id, "C", now - 175, DagsterEventType.STEP_START),
        _event_record(run_id, "C", now - 150, DagsterEventType.STEP_SKIPPED),
        _event_record(run_id, "D", now - 150, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            "D",
            now - 125,
            DagsterEventType.ASSET_MATERIALIZATION,
            StepMaterializationData(AssetMaterialization(asset_key="mat_1")),
        ),
        _event_record(
            run_id,
            "D",
            now - 100,
            DagsterEventType.STEP_EXPECTATION_RESULT,
            StepExpectationResultData(
                ExpectationResult(success=True, label="exp 1")),
        ),
        _event_record(
            run_id,
            "D",
            now - 75,
            DagsterEventType.ASSET_MATERIALIZATION,
            StepMaterializationData(AssetMaterialization(asset_key="mat_2")),
        ),
        _event_record(
            run_id,
            "D",
            now - 50,
            DagsterEventType.STEP_EXPECTATION_RESULT,
            StepExpectationResultData(
                ExpectationResult(success=False, label="exp 2")),
        ),
        _event_record(
            run_id,
            "D",
            now - 25,
            DagsterEventType.ASSET_MATERIALIZATION,
            StepMaterializationData(AssetMaterialization(asset_key="mat_3")),
        ),
        _event_record(
            run_id,
            "D",
            now,
            DagsterEventType.STEP_SUCCESS,
            StepSuccessData(duration_ms=150000.0),
        ),
    ]