def fail_solid(_): yield Output( None, metadata_entries=[ PartitionMetadataEntry("3", MetadataEntry.int(1, "x")) ], )
def test_io_manager_single_partition_materialization(): entry1 = MetadataEntry.int(123, "nrows") entry2 = MetadataEntry.float(3.21, "some value") class MyIOManager(IOManager): def handle_output(self, context, obj): # store asset yield entry1 def load_input(self, context): return None def get_output_asset_key(self, context): return AssetKey([context.step_key]) @io_manager def my_io_manager(_): return MyIOManager() @solid(output_defs=[OutputDefinition(name="output1")]) def solid1(_): return Output(None, "output1") @solid(output_defs=[OutputDefinition(name="output2")]) def solid2(_, _input1): yield Output( 7, "output2", metadata_entries=[entry2], ) @pipeline(mode_defs=[ ModeDefinition(resource_defs={"io_manager": my_io_manager}) ]) def my_pipeline(): solid2(solid1()) result = execute_pipeline(my_pipeline) events = result.step_event_list materializations = [ event for event in events if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 2 check_materialization(materializations[0], AssetKey(["solid1"]), metadata_entries=[entry1]) check_materialization( materializations[1], AssetKey(["solid2"]), metadata_entries=[entry1, entry2], parent_assets=[AssetLineageInfo(AssetKey(["solid1"]))], )
def test_dynamic_output_definition_single_partition_materialization(): entry1 = MetadataEntry.int(123, "nrows") entry2 = MetadataEntry.float(3.21, "some value") @solid(output_defs=[OutputDefinition(name="output1", asset_key=AssetKey("table1"))]) def solid1(_): return Output(None, "output1", metadata_entries=[entry1]) @solid( output_defs=[ DynamicOutputDefinition( name="output2", asset_key=lambda context: AssetKey(context.mapping_key) ) ] ) def solid2(_, _input1): for i in range(4): yield DynamicOutput( 7, mapping_key=str(i), output_name="output2", metadata_entries=[entry2], ) @solid def do_nothing(_, _input1): pass @pipeline def my_pipeline(): solid2(solid1()).map(do_nothing) result = execute_pipeline(my_pipeline) events = result.step_event_list materializations = [ event for event in events if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 5 check_materialization(materializations[0], AssetKey(["table1"]), metadata_entries=[entry1]) seen_paths = set() for i in range(1, 5): path = materializations[i].asset_key.path seen_paths.add(tuple(path)) check_materialization( materializations[i], AssetKey(path), metadata_entries=[entry2], parent_assets=[AssetLineageInfo(AssetKey(["table1"]))], ) assert len(seen_paths) == 4
def test_output_definition_single_partition_materialization(): entry1 = MetadataEntry.int(123, "nrows") entry2 = MetadataEntry.float(3.21, "some value") @solid(output_defs=[ OutputDefinition(name="output1", asset_key=AssetKey("table1")) ]) def solid1(_): return Output(None, "output1", metadata_entries=[entry1]) @solid(output_defs=[ OutputDefinition(name="output2", asset_key=lambda _: AssetKey("table2")) ]) def solid2(_, _input1): yield Output( 7, "output2", metadata_entries=[entry2], ) @pipeline def my_pipeline(): solid2(solid1()) result = execute_pipeline(my_pipeline) events = result.step_event_list materializations = [ event for event in events if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 2 check_materialization(materializations[0], AssetKey(["table1"]), metadata_entries=[entry1]) check_materialization( materializations[1], AssetKey(["table2"]), metadata_entries=[entry2], parent_assets=[AssetLineageInfo(AssetKey(["table1"]))], )
def test_output_definition_multiple_partition_materialization(): entry1 = MetadataEntry.int(123, "nrows") entry2 = MetadataEntry.float(3.21, "some value") partition_entries = [ MetadataEntry.int(123 * i * i, "partition count") for i in range(3) ] @solid(output_defs=[ OutputDefinition(name="output1", asset_key=AssetKey("table1"), asset_partitions=set(["0", "1", "2"])) ]) def solid1(_): return Output( None, "output1", metadata_entries=[ entry1, *[ PartitionMetadataEntry(str(i), entry) for i, entry in enumerate(partition_entries) ], ], ) @solid(output_defs=[ OutputDefinition(name="output2", asset_key=AssetKey("table2")) ]) def solid2(_, _input1): yield Output( 7, "output2", metadata_entries=[entry2], ) @pipeline def my_pipeline(): solid2(solid1()) result = execute_pipeline(my_pipeline) events = result.step_event_list materializations = [ event for event in events if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 4 seen_partitions = set() for i in range(3): partition = materializations[i].partition seen_partitions.add(partition) check_materialization( materializations[i], AssetKey(["table1"]), metadata_entries=[entry1, partition_entries[int(partition)]], ) assert len(seen_partitions) == 3 check_materialization( materializations[-1], AssetKey(["table2"]), metadata_entries=[entry2], parent_assets=[n_asset_keys("table1", 3)], )