def test_my_io_manager_load_input(): my_io_manager = MyIOManager() my_io_manager.storage_dict[("123", "abc")] = 5 context = InputContext( upstream_output=OutputContext(name="abc", step_key="123")) assert my_io_manager.load_input(context) == 5
def test_handle_output_then_load_input(): snowflake_config = generate_snowflake_config() snowflake_manager = SnowflakeIOManager() contents1 = DataFrame([{ "col1": "a", "col2": 1 }]) # just to get the types right contents2 = DataFrame([{ "col1": "b", "col2": 2 }]) # contents we will insert with temporary_snowflake_table(contents1) as temp_table_name: metadata = { "table": f"public.{temp_table_name}", } output_context = OutputContext( step_key="a", name="result", pipeline_name="fake_pipeline", metadata=metadata, resource_config=snowflake_config, ) list(snowflake_manager.handle_output( output_context, contents2)) # exhaust the iterator input_context = InputContext(upstream_output=output_context, resource_config=snowflake_config) input_value = snowflake_manager.load_input(input_context) assert input_value.equals(contents2), f"{input_value}\n\n{contents2}"
def test_s3_object_manager_execution(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = {"resources": {"object_manager": {"config": {"s3_bucket": mock_s3_bucket.name}}}} run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config ) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(return_one_step_events, "return_one") object_manager = PickledObjectS3ObjectManager( mock_s3_bucket.name, construct_s3_client(max_attempts=5), s3_prefix="dagster" ) step_output_handle = StepOutputHandle("return_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("return_one"), config={}, metadata={}, upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, metadata={}, mapping_key=None, config=None, solid_def=pipeline_def.solid_def_named("return_one"), ), ) assert object_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) step_output_handle = StepOutputHandle("add_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("add_one"), config={}, metadata={}, upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, metadata={}, mapping_key=None, config=None, solid_def=pipeline_def.solid_def_named("add_one"), ), ) assert get_step_output(add_one_step_events, "add_one") assert object_manager.load_input(context) == 2
def test_my_io_manager_handle_output(): my_io_manager = MyIOManager() context = OutputContext(name="abc", step_key="123") my_io_manager.handle_output(context, 5) assert my_io_manager.storage_dict[("123", "abc")] == 5
def test_gcs_pickle_io_manager_execution(gcs_bucket): pipeline_def = define_inty_pipeline() run_config = { "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket, } } } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client()) step_output_handle = StepOutputHandle("return_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("return_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("return_one"), ), log_manager=DagsterLogManager(run_id=pipeline_run.run_id, logging_tags={}, loggers=[]), ) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("add_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("add_one"), ), log_manager=DagsterLogManager(run_id=pipeline_run.run_id, logging_tags={}, loggers=[]), ) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2
def test_adls2_object_manager_execution(storage_account, file_system, credential): pipeline_def = define_inty_pipeline() run_config = { "resources": { "object_manager": { "config": { "adls2_file_system": file_system } }, "adls2": { "config": { "storage_account": storage_account, "credential": { "key": credential } } }, } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") step_output_handle = StepOutputHandle("return_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("return_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("return_one"), ), ) object_manager = PickledObjectADLS2ObjectManager( file_system=file_system, adls2_client=create_adls2_client(storage_account, credential), blob_client=create_blob_client(storage_account, credential), ) assert object_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"]), pipeline_run=pipeline_run, run_config=run_config, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = InputContext( pipeline_name=pipeline_def.name, solid_def=pipeline_def.solid_def_named("add_one"), upstream_output=OutputContext( step_key=step_output_handle.step_key, name=step_output_handle.output_name, pipeline_name=pipeline_def.name, run_id=run_id, solid_def=pipeline_def.solid_def_named("add_one"), ), ) assert get_step_output(add_one_step_events, "add_one") assert object_manager.load_input(context) == 2