def logs_events(context): context.log_event(AssetMaterialization("first")) context.log_event(Materialization("second")) context.log_event(ExpectationResult(success=True)) context.log_event(AssetObservation("fourth")) yield AssetMaterialization("fifth") yield Output("blah")
def partitioned_dataset_op(context): partition_date = context.op_config["date"] df = read_df_for_date(partition_date) context.log_event( AssetObservation(asset_key="my_partitioned_dataset", partition=partition_date)) return df
def the_solid(_context): yield AssetObservation( asset_key="foo", metadata={ "text": "FOO", "int": 22, "url": EventMetadata.url("http://fake.com"), "float": 0.1, "python": EventMetadata.python_artifact(EventMetadata), }, )
def test_asset_observation(): @op(out={}) def my_op(): yield AssetObservation("abc") @job def my_job(): my_op() result = my_job.execute_in_process() assert result.asset_observations_for_node("my_op") == [ AssetObservation(asset_key=AssetKey(["abc"])) ]
def observes_dataset_op(context): df = read_df() remote_storage_path = persist_to_storage(df) context.log_event( AssetObservation( asset_key="my_dataset", metadata={ "text_metadata": "Text-based metadata for this event", "path": EventMetadata.path(remote_storage_path), "dashboard_url": EventMetadata.url("http://mycoolsite.com/url_for_my_data"), "size (bytes)": calculate_bytes(df), }, )) context.log_event(AssetMaterialization(asset_key="my_dataset")) return remote_storage_path
def test_io_manager_single_partition_add_input_metadata(): partitions_def = StaticPartitionsDefinition(["a", "b", "c"]) @asset(partitions_def=partitions_def) def asset_1(): return 1 @asset(partitions_def=partitions_def) def asset_2(asset_1): return asset_1 + 1 class MyIOManager(IOManager): def handle_output(self, context, obj): pass def load_input(self, context): context.add_input_metadata(metadata={"foo": "bar"}, description="hello world") return 1 @io_manager def my_io_manager(_): return MyIOManager() assets_job = build_assets_job( "assets_job", [asset_1, asset_2], resource_defs={"io_manager": my_io_manager} ) result = assets_job.execute_in_process(partition_key="a") get_observation = lambda event: event.event_specific_data.asset_observation observations = [ event for event in result.all_node_events if event.event_type_value == "ASSET_OBSERVATION" ] assert observations[0].step_key == "asset_2" assert get_observation(observations[0]) == AssetObservation( asset_key="asset_1", metadata={"foo": "bar"}, description="hello world", partition="a" )
def asset_yields_observation(): yield AssetObservation(asset_key=AssetKey("asset_yields_observation"), metadata={"text": "FOO"}) yield AssetMaterialization(asset_key=AssetKey("asset_yields_observation")) yield Output(5)
def basic_op(context): context.log_event(AssetMaterialization("first")) context.log_event(Materialization("second")) context.log_event(AssetMaterialization("third")) context.log_event(ExpectationResult(success=True)) context.log_event(AssetObservation("fourth"))
def observation_op(context): df = read_df() context.log_event( AssetObservation(asset_key="observation_asset", metadata={"num_rows": len(df)})) return 5
def test_io_manager_add_input_metadata(): class MyIOManager(IOManager): def handle_output(self, context, obj): pass def load_input(self, context): context.add_input_metadata(metadata={"foo": "bar"}) context.add_input_metadata(metadata={"baz": "qux"}) observations = context.get_observations() assert observations[0].asset_key == context.asset_key assert observations[0].metadata_entries[0].label == "foo" assert observations[1].metadata_entries[0].label == "baz" return 1 @io_manager def my_io_manager(_): return MyIOManager() in_asset_key = AssetKey(["a", "b"]) out_asset_key = AssetKey(["c", "d"]) @op(out=Out(asset_key=out_asset_key)) def before(): pass @op(ins={"a": In(asset_key=in_asset_key)}, out={}) def after(a): del a @job(resource_defs={"io_manager": my_io_manager}) def my_job(): after(before()) get_observation = lambda event: event.event_specific_data.asset_observation result = my_job.execute_in_process() observations = [ event for event in result.all_node_events if event.event_type_value == "ASSET_OBSERVATION" ] # first observation assert observations[0].step_key == "after" assert get_observation(observations[0]) == AssetObservation( asset_key=in_asset_key, metadata={"foo": "bar"} ) # second observation assert observations[1].step_key == "after" assert get_observation(observations[1]) == AssetObservation( asset_key=in_asset_key, metadata={"baz": "qux"} ) # confirm loaded_input event contains metadata loaded_input_event = [ event for event in result.all_events if event.event_type_value == "LOADED_INPUT" ][0] assert loaded_input_event loaded_input_event_metadata = loaded_input_event.event_specific_data.metadata_entries assert len(loaded_input_event_metadata) == 2 assert loaded_input_event_metadata[0].label == "foo" assert loaded_input_event_metadata[1].label == "baz"
def my_op(): yield AssetObservation("abc")