Example #1
0
 def logs_events(context):
     context.log_event(AssetMaterialization("first"))
     context.log_event(Materialization("second"))
     context.log_event(ExpectationResult(success=True))
     context.log_event(AssetObservation("fourth"))
     yield AssetMaterialization("fifth")
     yield Output("blah")
Example #2
0
def partitioned_dataset_op(context):
    partition_date = context.op_config["date"]
    df = read_df_for_date(partition_date)
    context.log_event(
        AssetObservation(asset_key="my_partitioned_dataset",
                         partition=partition_date))
    return df
Example #3
0
 def the_solid(_context):
     yield AssetObservation(
         asset_key="foo",
         metadata={
             "text": "FOO",
             "int": 22,
             "url": EventMetadata.url("http://fake.com"),
             "float": 0.1,
             "python": EventMetadata.python_artifact(EventMetadata),
         },
     )
Example #4
0
def test_asset_observation():
    @op(out={})
    def my_op():
        yield AssetObservation("abc")

    @job
    def my_job():
        my_op()

    result = my_job.execute_in_process()
    assert result.asset_observations_for_node("my_op") == [
        AssetObservation(asset_key=AssetKey(["abc"]))
    ]
Example #5
0
def observes_dataset_op(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    context.log_event(
        AssetObservation(
            asset_key="my_dataset",
            metadata={
                "text_metadata":
                "Text-based metadata for this event",
                "path":
                EventMetadata.path(remote_storage_path),
                "dashboard_url":
                EventMetadata.url("http://mycoolsite.com/url_for_my_data"),
                "size (bytes)":
                calculate_bytes(df),
            },
        ))
    context.log_event(AssetMaterialization(asset_key="my_dataset"))
    return remote_storage_path
Example #6
0
def test_io_manager_single_partition_add_input_metadata():
    partitions_def = StaticPartitionsDefinition(["a", "b", "c"])

    @asset(partitions_def=partitions_def)
    def asset_1():
        return 1

    @asset(partitions_def=partitions_def)
    def asset_2(asset_1):
        return asset_1 + 1

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            context.add_input_metadata(metadata={"foo": "bar"}, description="hello world")
            return 1

    @io_manager
    def my_io_manager(_):
        return MyIOManager()

    assets_job = build_assets_job(
        "assets_job", [asset_1, asset_2], resource_defs={"io_manager": my_io_manager}
    )
    result = assets_job.execute_in_process(partition_key="a")

    get_observation = lambda event: event.event_specific_data.asset_observation

    observations = [
        event for event in result.all_node_events if event.event_type_value == "ASSET_OBSERVATION"
    ]

    assert observations[0].step_key == "asset_2"
    assert get_observation(observations[0]) == AssetObservation(
        asset_key="asset_1", metadata={"foo": "bar"}, description="hello world", partition="a"
    )
Example #7
0
def asset_yields_observation():
    yield AssetObservation(asset_key=AssetKey("asset_yields_observation"),
                           metadata={"text": "FOO"})
    yield AssetMaterialization(asset_key=AssetKey("asset_yields_observation"))
    yield Output(5)
Example #8
0
 def basic_op(context):
     context.log_event(AssetMaterialization("first"))
     context.log_event(Materialization("second"))
     context.log_event(AssetMaterialization("third"))
     context.log_event(ExpectationResult(success=True))
     context.log_event(AssetObservation("fourth"))
Example #9
0
def observation_op(context):
    df = read_df()
    context.log_event(
        AssetObservation(asset_key="observation_asset",
                         metadata={"num_rows": len(df)}))
    return 5
Example #10
0
def test_io_manager_add_input_metadata():
    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            context.add_input_metadata(metadata={"foo": "bar"})
            context.add_input_metadata(metadata={"baz": "qux"})

            observations = context.get_observations()
            assert observations[0].asset_key == context.asset_key
            assert observations[0].metadata_entries[0].label == "foo"
            assert observations[1].metadata_entries[0].label == "baz"
            return 1

    @io_manager
    def my_io_manager(_):
        return MyIOManager()

    in_asset_key = AssetKey(["a", "b"])
    out_asset_key = AssetKey(["c", "d"])

    @op(out=Out(asset_key=out_asset_key))
    def before():
        pass

    @op(ins={"a": In(asset_key=in_asset_key)}, out={})
    def after(a):
        del a

    @job(resource_defs={"io_manager": my_io_manager})
    def my_job():
        after(before())

    get_observation = lambda event: event.event_specific_data.asset_observation

    result = my_job.execute_in_process()
    observations = [
        event for event in result.all_node_events if event.event_type_value == "ASSET_OBSERVATION"
    ]

    # first observation
    assert observations[0].step_key == "after"
    assert get_observation(observations[0]) == AssetObservation(
        asset_key=in_asset_key, metadata={"foo": "bar"}
    )
    # second observation
    assert observations[1].step_key == "after"
    assert get_observation(observations[1]) == AssetObservation(
        asset_key=in_asset_key, metadata={"baz": "qux"}
    )

    # confirm loaded_input event contains metadata
    loaded_input_event = [
        event for event in result.all_events if event.event_type_value == "LOADED_INPUT"
    ][0]
    assert loaded_input_event
    loaded_input_event_metadata = loaded_input_event.event_specific_data.metadata_entries
    assert len(loaded_input_event_metadata) == 2
    assert loaded_input_event_metadata[0].label == "foo"
    assert loaded_input_event_metadata[1].label == "baz"
Example #11
0
 def my_op():
     yield AssetObservation("abc")