def test_fs_io_manager(): with tempfile.TemporaryDirectory() as tmpdir_path: io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) pipeline_def = define_pipeline(io_manager) result = execute_pipeline(pipeline_def) assert result.success handled_output_events = list( filter(lambda evt: evt.is_handled_output, result.event_list)) assert len(handled_output_events) == 2 filepath_a = os.path.join(tmpdir_path, result.run_id, "solid_a", "result") assert os.path.isfile(filepath_a) with open(filepath_a, "rb") as read_obj: assert pickle.load(read_obj) == [1, 2, 3] loaded_input_events = list( filter(lambda evt: evt.is_loaded_input, result.event_list)) assert len(loaded_input_events) == 1 assert "solid_a" == loaded_input_events[ 0].event_specific_data.upstream_step_key filepath_b = os.path.join(tmpdir_path, result.run_id, "solid_b", "result") assert os.path.isfile(filepath_b) with open(filepath_b, "rb") as read_obj: assert pickle.load(read_obj) == 1
def test_fan_in_skip(): with tempfile.TemporaryDirectory() as tmpdir_path: default_io_manager = fs_io_manager.configured( {"base_dir": tmpdir_path}) @solid(output_defs=[OutputDefinition(name="skip", is_required=False)]) def skip(_): return yield # pylint: disable=unreachable @solid def one(_): return 1 @solid def receiver(_, input1): assert input1 == [1] @pipeline(mode_defs=[ ModeDefinition(resource_defs={"io_manager": default_io_manager}) ]) def my_pipeline(): receiver(input1=[one(), skip()]) assert execute_pipeline(my_pipeline).success
def test_fs_io_manager_reexecution(): with tempfile.TemporaryDirectory() as tmpdir_path: default_io_manager = fs_io_manager.configured( {"base_dir": tmpdir_path}) pipeline_def = define_pipeline(default_io_manager, {}) instance = DagsterInstance.ephemeral() result = execute_pipeline(pipeline_def, instance=instance) assert result.success re_result = reexecute_pipeline( pipeline_def, result.run_id, instance=instance, step_selection=["solid_b"], ) # re-execution should yield asset_store_operation events instead of intermediate events get_asset_events = list( filter( lambda evt: evt.is_asset_store_operation and AssetStoreOperationType(evt.event_specific_data.op ) == AssetStoreOperationType.GET_ASSET, re_result.event_list, )) assert len(get_asset_events) == 1 assert get_asset_events[0].event_specific_data.step_key == "solid_a"
def test_reexecute_subset_of_subset(): with tempfile.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.ephemeral() my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) def my_pipeline_def(should_fail): @solid def one(_): return 1 @solid def plus_two(_, i): if should_fail: raise Exception() return i + 2 @solid def plus_three(_, i): return i + 3 @pipeline(mode_defs=[ ModeDefinition(resource_defs={"io_manager": my_fs_io_manager}) ]) def my_pipeline(): plus_three(plus_two(one())) return my_pipeline first_result = execute_pipeline(my_pipeline_def(should_fail=True), instance=instance, raise_on_error=False) assert not first_result.success first_run_id = first_result.run_id second_result = reexecute_pipeline( my_pipeline_def(should_fail=False), instance=instance, parent_run_id=first_run_id, step_selection=["plus_two*"], ) assert second_result.success assert second_result.result_for_solid("plus_two").output_value() == 3 second_run_id = second_result.run_id # step_context._get_source_run_id should return first_run_id third_result = reexecute_pipeline( my_pipeline_def(should_fail=False), instance=instance, parent_run_id=second_run_id, step_selection=["plus_two*"], ) assert third_result.success assert third_result.result_for_solid("plus_two").output_value() == 3
def test_result_output(): with tempfile.TemporaryDirectory() as tmpdir_path: default_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) pipeline_def = define_pipeline(default_io_manager, {}) result = execute_pipeline(pipeline_def) assert result.success # test output_value assert result.result_for_solid("solid_a").output_value() == [1, 2, 3] assert result.result_for_solid("solid_b").output_value() == 1
def test_reexecute_subset_of_subset_with_composite(): @solid def one(_): return 1 @solid def plus_two(_, i): return i + 2 @composite_solid def one_plus_two(): return plus_two(one()) @solid def plus_three(_, i): return i + 3 with tempfile.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.ephemeral() my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) @pipeline(mode_defs=[ ModeDefinition(resource_defs={"io_manager": my_fs_io_manager}) ]) def my_pipeline(): plus_three(one_plus_two()) first_result = execute_pipeline(my_pipeline, instance=instance) assert first_result.success first_run_id = first_result.run_id second_result = reexecute_pipeline( my_pipeline, instance=instance, parent_run_id=first_run_id, step_selection=["plus_three"], ) assert second_result.success second_run_id = second_result.run_id # step_context._get_source_run_id should return first_run_id third_result = reexecute_pipeline( my_pipeline, instance=instance, parent_run_id=second_run_id, step_selection=["plus_three"], ) assert third_result.success
def test_fan_in(): with tempfile.TemporaryDirectory() as tmpdir_path: default_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) @solid def input_solid1(_): return 1 @solid def input_solid2(_): return 2 @solid def solid1(_, input1): assert input1 == [1, 2] @pipeline(mode_defs=[ModeDefinition(resource_defs={"io_manager": default_io_manager})]) def my_pipeline(): solid1(input1=[input_solid1(), input_solid2()]) execute_pipeline(my_pipeline)
def test_fs_io_manager(): with tempfile.TemporaryDirectory() as tmpdir_path: io_manager = fs_io_manager.configured({"base_dir": tmpdir_path}) pipeline_def = define_pipeline(io_manager) result = execute_pipeline(pipeline_def) assert result.success asset_store_operation_events = list( filter(lambda evt: evt.is_asset_store_operation, result.event_list)) assert len(asset_store_operation_events) == 3 # SET ASSET for step "solid_a" output "result" assert (asset_store_operation_events[0].event_specific_data.op == AssetStoreOperationType.SET_ASSET.value) filepath_a = os.path.join(tmpdir_path, result.run_id, "solid_a", "result") assert os.path.isfile(filepath_a) with open(filepath_a, "rb") as read_obj: assert pickle.load(read_obj) == [1, 2, 3] # GET ASSET for step "solid_b" input "_df" assert (asset_store_operation_events[1].event_specific_data.op == AssetStoreOperationType.GET_ASSET.value) assert "solid_a" == asset_store_operation_events[ 1].event_specific_data.step_key # SET ASSET for step "solid_b" output "result" assert (asset_store_operation_events[2].event_specific_data.op == AssetStoreOperationType.SET_ASSET.value) filepath_b = os.path.join(tmpdir_path, result.run_id, "solid_b", "result") assert os.path.isfile(filepath_b) with open(filepath_b, "rb") as read_obj: assert pickle.load(read_obj) == 1
def test_fs_io_manager_unpicklable(): @op def unpicklable_local_func_output(): # locally defined functions can't be pickled (AttributeError) def local_func(): return 1 return local_func @op def unpicklable_lambda_output(): return l @op def recursion_limit_output(): # a will exceed the recursion limit of 1000 and can't be pickled (RecursionError) a = [] for _ in range(2000): a = [a] return a @op def op_b(_i): return 1 @graph def local_func_graph(): op_b(unpicklable_local_func_output()) @graph def lambda_graph(): op_b(unpicklable_lambda_output()) @graph def recursion_limit_graph(): op_b(recursion_limit_output()) with tempfile.TemporaryDirectory() as tmp_dir: with instance_for_test(temp_dir=tmp_dir) as instance: io_manager = fs_io_manager.configured({"base_dir": tmp_dir}) local_func_job = local_func_graph.to_job( resource_defs={"io_manager": io_manager}) with pytest.raises(DagsterInvariantViolationError, match=r"Object .* is not picklable. .*"): local_func_job.execute_in_process(instance=instance) lambda_job = lambda_graph.to_job( resource_defs={"io_manager": io_manager}) with pytest.raises(DagsterInvariantViolationError, match=r"Object .* is not picklable. .*"): lambda_job.execute_in_process(instance=instance) recursion_job = recursion_limit_graph.to_job( resource_defs={"io_manager": io_manager}) with pytest.raises( DagsterInvariantViolationError, match= r"Object .* exceeds recursion limit and is not picklable. .*", ): recursion_job.execute_in_process(instance=instance)