def test_default_asset_store_reexecution(): with seven.TemporaryDirectory() as tmpdir_path: default_asset_store = fs_asset_store.configured( {"base_dir": tmpdir_path}) pipeline_def = define_asset_pipeline(default_asset_store, {}) instance = DagsterInstance.ephemeral() result = execute_pipeline(pipeline_def, instance=instance) assert result.success re_result = reexecute_pipeline( pipeline_def, result.run_id, instance=instance, step_selection=["solid_b"], ) # re-execution should yield asset_store_operation events instead of intermediate events get_asset_events = list( filter( lambda evt: evt.is_asset_store_operation and AssetStoreOperationType(evt.event_specific_data.op ) == AssetStoreOperationType.GET_ASSET, re_result.event_list, )) assert len(get_asset_events) == 1 assert get_asset_events[0].event_specific_data.step_key == "solid_a"
def test_result_output(): with seven.TemporaryDirectory() as tmpdir_path: asset_store = fs_asset_store.configured({"base_dir": tmpdir_path}) pipeline_def = define_asset_pipeline(asset_store, {}) result = execute_pipeline(pipeline_def) assert result.success # test output_value assert result.result_for_solid("solid_a").output_value() == [1, 2, 3] assert result.result_for_solid("solid_b").output_value() == 1
def test_fan_in(): with tempfile.TemporaryDirectory() as tmpdir_path: asset_store = fs_asset_store.configured({"base_dir": tmpdir_path}) @solid def input_solid1(_): return 1 @solid def input_solid2(_): return 2 @solid def solid1(_, input1): assert input1 == [1, 2] @pipeline(mode_defs=[ModeDefinition(resource_defs={"object_manager": asset_store})]) def my_pipeline(): solid1(input1=[input_solid1(), input_solid2()]) execute_pipeline(my_pipeline)
def test_fs_asset_store(): with tempfile.TemporaryDirectory() as tmpdir_path: asset_store = fs_asset_store.configured({"base_dir": tmpdir_path}) pipeline_def = define_asset_pipeline(asset_store, {}) result = execute_pipeline(pipeline_def) assert result.success asset_store_operation_events = list( filter(lambda evt: evt.is_asset_store_operation, result.event_list) ) assert len(asset_store_operation_events) == 3 # SET ASSET for step "solid_a" output "result" assert ( asset_store_operation_events[0].event_specific_data.op == AssetStoreOperationType.SET_ASSET ) filepath_a = os.path.join(tmpdir_path, result.run_id, "solid_a", "result") assert os.path.isfile(filepath_a) with open(filepath_a, "rb") as read_obj: assert pickle.load(read_obj) == [1, 2, 3] # GET ASSET for step "solid_b" input "_df" assert ( asset_store_operation_events[1].event_specific_data.op == AssetStoreOperationType.GET_ASSET ) assert "solid_a" == asset_store_operation_events[1].event_specific_data.step_key # SET ASSET for step "solid_b" output "result" assert ( asset_store_operation_events[2].event_specific_data.op == AssetStoreOperationType.SET_ASSET ) filepath_b = os.path.join(tmpdir_path, result.run_id, "solid_b", "result") assert os.path.isfile(filepath_b) with open(filepath_b, "rb") as read_obj: assert pickle.load(read_obj) == 1
def test_asset_store_optional_output(): with tempfile.TemporaryDirectory() as tmpdir_dir: asset_store = fs_asset_store.configured({"base_dir": tmpdir_dir}) skip = True @solid(output_defs=[OutputDefinition(is_required=False)]) def solid_a(_context): if not skip: yield Output([1, 2]) @solid def solid_skipped(_context, array): return array @pipeline(mode_defs=[ModeDefinition("local", resource_defs={"asset_store": asset_store})]) def asset_pipeline_optional_output(): solid_skipped(solid_a()) result = execute_pipeline(asset_pipeline_optional_output) assert result.success assert result.result_for_solid("solid_skipped").skipped
ModeDefinition, OutputDefinition, execute_pipeline, pipeline, reexecute_pipeline, repository, solid, ) from dagster.core.storage.asset_store import fs_asset_store def train(df): return len(df) local_asset_store = fs_asset_store.configured( {"base_dir": "uncommitted/intermediates/"}) @solid(output_defs=[OutputDefinition(manager_key="fs_asset_store")]) def call_api(_): return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @solid(output_defs=[OutputDefinition(manager_key="fs_asset_store")]) def parse_df(context, df): context.log.info("{}".format(df)) result_df = df[:5] return result_df @solid(output_defs=[OutputDefinition(manager_key="fs_asset_store")])