def test_adls2_intermediate_storage_with_custom_serializer( storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, LowercaseString, StepOutputHandle("foo"), "foo") assert ( intermediate_storage.object_store.file_system_client. get_file_client( os.path.join(*[ intermediate_storage.root, "intermediates", "foo", "result" ]), ).download_file().readall().decode("utf-8") == "FOO") assert intermediate_storage.has_intermediate( context, StepOutputHandle("foo")) assert (intermediate_storage.get_intermediate( context, LowercaseString, StepOutputHandle("foo")).obj == "foo") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
def test_memoized_plan_root_input_manager_input_config(): @root_input_manager(version="foo", input_config_schema={"my_str": str}) def my_input_manager(): return 5 @solid( input_defs=[InputDefinition("x", root_manager_key="my_input_manager")], version="foo") def my_solid_takes_input(x): return x @pipeline( mode_defs=[ ModeDefinition(resource_defs={ "io_manager": IOManagerDefinition.hardcoded_io_manager( VersionedInMemoryIOManager()), "my_input_manager": my_input_manager, }, ), ], tags={MEMOIZED_RUN_TAG: "true"}, ) def my_pipeline(): my_solid_takes_input() input_config = {"my_str": "foo"} run_config = { "solids": { "my_solid_takes_input": { "inputs": { "x": input_config } } } } with instance_for_test() as instance: plan = create_execution_plan( my_pipeline, instance_ref=instance.get_ref(), run_config=run_config, ) output_version = plan.get_version_for_step_output_handle( StepOutputHandle("my_solid_takes_input", "result")) assert output_version is not None input_config["my_str"] = "bar" plan = create_execution_plan( my_pipeline, instance_ref=instance.get_ref(), run_config=run_config, ) new_output_version = plan.get_version_for_step_output_handle( StepOutputHandle("my_solid_takes_input", "result")) # Ensure that after changing input config, the version changes. assert not new_output_version == output_version
def test_resolve_memoized_execution_plan_yes_stored_results(): manager = VersionedInMemoryIOManager() versioned_pipeline = versioned_pipeline_factory(manager) speculative_execution_plan = create_execution_plan(versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input", "result") step_output_version = speculative_execution_plan.resolve_step_output_versions()[ step_output_handle ] manager.values[ (step_output_handle.step_key, step_output_handle.output_name, step_output_version) ] = 4 with DagsterInstance.ephemeral() as dagster_instance: memoized_execution_plan = resolve_memoized_execution_plan( speculative_execution_plan, {}, dagster_instance ) assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input"] expected_handle = StepOutputHandle( step_key="versioned_solid_no_input", output_name="result" ) assert ( memoized_execution_plan.get_step_by_key("versioned_solid_takes_input") .step_input_dict["intput"] .source.step_output_handle == expected_handle )
def test_adls2_intermediate_storage_with_custom_prefix(storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, prefix="custom_prefix", ) assert intermediate_storage.root == "/".join( ["custom_prefix", "storage", run_id]) try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate(context, RuntimeBool, StepOutputHandle("true"), True) assert intermediate_storage.has_intermediate( context, StepOutputHandle("true")) assert intermediate_storage.uri_for_paths(["true"]).startswith( "abfss://{fs}@{account}.dfs.core.windows.net/custom_prefix". format(account=storage_account, fs=file_system)) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("true"))
def test_gcs_intermediate_storage_composite_types_with_custom_serializer_for_inner_type( gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage(run_id=run_id, gcs_bucket=gcs_bucket) obj_name = "list" with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate( context, resolve_dagster_type(List[LowercaseString]), StepOutputHandle(obj_name), ["foo", "bar"], ) assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert intermediate_storage.get_intermediate( context, resolve_dagster_type(List[Bool]), StepOutputHandle(obj_name)).obj == ["foo", "bar"] finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_gcs_intermediate_storage_with_custom_prefix(gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage(run_id=run_id, gcs_bucket=gcs_bucket, gcs_prefix="custom_prefix") assert intermediate_storage.root == "/".join( ["custom_prefix", "storage", run_id]) obj_name = "true" try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate(context, RuntimeBool, StepOutputHandle(obj_name), True) assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert intermediate_storage.uri_for_paths([obj_name]).startswith( "gs://%s/custom_prefix" % gcs_bucket) finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_adls2_intermediate_storage_composite_types_with_custom_serializer_for_inner_type( storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, ) obj_name = "list" with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate( context, resolve_dagster_type(List[LowercaseString]), StepOutputHandle(obj_name), ["foo", "bar"], ) assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert intermediate_storage.get_intermediate( context, resolve_dagster_type(List[Bool]), StepOutputHandle(obj_name)).obj == ["foo", "bar"] finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_gcs_intermediate_storage_with_type_storage_plugin(gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage( run_id=run_id, gcs_bucket=gcs_bucket, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringGCSTypeStoragePlugin) ]), ) obj_name = "obj_name" with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, RuntimeString, StepOutputHandle(obj_name), "hello") assert intermediate_storage.has_intermediate( context, StepOutputHandle(obj_name)) assert (intermediate_storage.get_intermediate( context, RuntimeString, StepOutputHandle(obj_name)) == "hello") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_s3_intermediate_storage_with_custom_serializer(mock_s3_bucket): run_id = make_new_run_id() intermediate_storage = S3IntermediateStorage(run_id=run_id, s3_bucket=mock_s3_bucket.name) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, LowercaseString, StepOutputHandle("foo"), "foo") assert (intermediate_storage.object_store.s3.get_object( Bucket=intermediate_storage.object_store.bucket, Key=os.path.join(intermediate_storage.root, "intermediates", "foo", "result"), )["Body"].read().decode("utf-8") == "FOO") assert intermediate_storage.has_intermediate( context, StepOutputHandle("foo")) assert (intermediate_storage.get_intermediate( context, LowercaseString, StepOutputHandle("foo")).obj == "foo") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("foo"))
def test_s3_intermediate_storage_with_type_storage_plugin(mock_s3_bucket): run_id = make_new_run_id() intermediate_storage = S3IntermediateStorage( run_id=run_id, s3_bucket=mock_s3_bucket.name, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringS3TypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, RuntimeString, StepOutputHandle("obj_name"), "hello") assert intermediate_storage.has_intermediate( context, StepOutputHandle("obj_name")) assert (intermediate_storage.get_intermediate( context, RuntimeString, StepOutputHandle("obj_name")) == "hello") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("obj_name"))
def test_custom_read_write_mode(gcs_bucket): run_id = make_new_run_id() intermediate_storage = GCSIntermediateStorage(run_id=run_id, gcs_bucket=gcs_bucket) data_frame = [OrderedDict({"foo": "1", "bar": "1"}), OrderedDict({"foo": "2", "bar": "2"})] obj_name = "data_frame" try: with yield_empty_pipeline_context(run_id=run_id) as context: intermediate_storage.set_intermediate( context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle(obj_name), data_frame, ) assert intermediate_storage.has_intermediate(context, StepOutputHandle(obj_name)) assert ( intermediate_storage.get_intermediate( context, resolve_dagster_type(LessSimpleDataFrame), StepOutputHandle(obj_name) ).obj == data_frame ) assert intermediate_storage.uri_for_paths([obj_name]).startswith("gs://") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle(obj_name))
def test_adls2_intermediate_storage_with_type_storage_plugin( storage_account, file_system): run_id = make_new_run_id() intermediate_storage = ADLS2IntermediateStorage( adls2_client=get_adls2_client(storage_account), blob_client=get_blob_client(storage_account), run_id=run_id, file_system=file_system, type_storage_plugin_registry=TypeStoragePluginRegistry([ (RuntimeString, FancyStringS3TypeStoragePlugin) ]), ) with yield_empty_pipeline_context(run_id=run_id) as context: try: intermediate_storage.set_intermediate(context, RuntimeString, StepOutputHandle("obj_name"), "hello") assert intermediate_storage.has_intermediate( context, StepOutputHandle("obj_name")) assert (intermediate_storage.get_intermediate( context, RuntimeString, StepOutputHandle("obj_name")) == "hello") finally: intermediate_storage.rm_intermediate(context, StepOutputHandle("obj_name"))
def test_using_intermediate_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: run_config = {"intermediate_storage": {"filesystem": {}}} pipeline = reconstructable(define_inty_pipeline) environment_config = EnvironmentConfig.build( pipeline.get_definition(), run_config=run_config, ) execution_plan = ExecutionPlan.build( pipeline, environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("add_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2)
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {"solids": { "add_one": { "inputs": { "num": { "value": 3 } } } }}) result = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, ) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 ## re-execute add_two execution_plan = create_execution_plan(pipeline_def, run_config=run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) step_events = execute_plan( execution_plan.build_subset_plan(["add_two"]), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two")
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system): adls2_prefix = "custom_prefix" pipe = define_inty_pipeline(should_throw=False) run_config = { "resources": { "adls2": { "config": { "storage_account": storage_account, "credential": get_azure_credential() } } }, "intermediate_storage": { "adls2": { "config": { "adls2_file_system": file_system, "adls2_prefix": adls2_prefix } } }, } pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config) instance = DagsterInstance.ephemeral() result = execute_pipeline( pipe, run_config=run_config, ) assert result.success execution_plan = create_execution_plan(pipe, run_config) with scoped_pipeline_context( execution_plan, InMemoryPipeline(pipe), run_config, pipeline_run, instance, ) as context: resource = context.scoped_resources_builder.build( required_resource_keys={"adls2"}).adls2 intermediate_storage = ADLS2IntermediateStorage( run_id=result.run_id, file_system=file_system, prefix=adls2_prefix, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) assert intermediate_storage.root == "/".join( ["custom_prefix", "storage", result.run_id]) assert (intermediate_storage.get_intermediate( context, Int, StepOutputHandle("return_one")).obj == 1) assert (intermediate_storage.get_intermediate( context, Int, StepOutputHandle("add_one")).obj == 2)
def test_failing_execution_plan(): pipeline_def = define_diamond_pipeline() plan = create_execution_plan(pipeline_def) with plan.start(retry_mode=(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "return_two" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_1.key) active_execution.mark_step_produced_output( StepOutputHandle(step_1.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 2 step_2 = steps[0] step_3 = steps[1] assert step_2.key == "add_three" assert step_3.key == "mult_three" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_2.key) active_execution.mark_step_produced_output( StepOutputHandle(step_2.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress # uh oh failure active_execution.mark_failed(step_3.key) active_execution.mark_step_produced_output( StepOutputHandle(step_3.key, "result")) # cant progres to 4th step steps = active_execution.get_steps_to_execute() assert len(steps) == 0 assert not active_execution.is_complete steps = active_execution.get_steps_to_abandon() assert len(steps) == 1 step_4 = steps[0] assert step_4.key == "adder" active_execution.mark_abandoned(step_4.key) assert active_execution.is_complete
def test_file_system_intermediate_storage_composite_types(): _, _, intermediate_storage = define_intermediate_storage() assert intermediate_storage.set_intermediate( None, List[Bool], StepOutputHandle("return_true_lst"), [True]) assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_true_lst")) assert intermediate_storage.get_intermediate( None, List[Bool], StepOutputHandle("return_true_lst")).obj == [True]
def test_using_intermediates_file_system_for_subplan(): pipeline = define_inty_pipeline() run_config = {"intermediate_storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate(None, StepOutputHandle("add_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2
def test_resolve_step_output_versions_no_external_dependencies(): versioned_pipeline = versioned_pipeline_factory() speculative_execution_plan = create_execution_plan(versioned_pipeline, run_config={}, mode="main") versions = speculative_execution_plan.resolve_step_output_versions() assert (versions[StepOutputHandle( "versioned_solid_no_input", "result")] == versioned_pipeline_expected_step1_output_version()) assert (versions[StepOutputHandle( "versioned_solid_takes_input", "result")] == versioned_pipeline_expected_step2_output_version())
def test_active_execution_plan(): plan = create_execution_plan(define_diamond_pipeline()) with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "return_two" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_1.key) active_execution.mark_step_produced_output(StepOutputHandle(step_1.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 2 step_2 = steps[0] step_3 = steps[1] assert step_2.key == "add_three" assert step_3.key == "mult_three" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_2.key) active_execution.mark_step_produced_output(StepOutputHandle(step_2.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress active_execution.mark_success(step_3.key) active_execution.mark_step_produced_output(StepOutputHandle(step_3.key, "result")) steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_4 = steps[0] assert step_4.key == "adder" steps = active_execution.get_steps_to_execute() assert len(steps) == 0 # cant progress assert not active_execution.is_complete active_execution.mark_success(step_4.key) assert active_execution.is_complete
def test_file_system_intermediate_storage_composite_types_with_custom_serializer_for_inner_type( ): run_id, instance, intermediate_storage = define_intermediate_storage() with yield_empty_pipeline_context(run_id=run_id, instance=instance) as context: intermediate_storage.set_intermediate( context, resolve_dagster_type(List[LowercaseString]), StepOutputHandle("baz"), ["list"]) assert intermediate_storage.has_intermediate(context, StepOutputHandle("baz")) assert intermediate_storage.get_intermediate( context, resolve_dagster_type(List[Bool]), StepOutputHandle("baz")).obj == ["list"]
def test_resolve_memoized_execution_plan_partial_versioning(): manager = VersionedInMemoryIOManager() partially_versioned_pipeline = partially_versioned_pipeline_factory( manager) speculative_execution_plan = create_execution_plan( partially_versioned_pipeline) resolved_run_config = ResolvedRunConfig.build(partially_versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input", "result") step_output_version = resolve_step_output_versions( partially_versioned_pipeline, speculative_execution_plan, resolved_run_config)[step_output_handle] manager.values[(step_output_handle.step_key, step_output_handle.output_name, step_output_version)] = 4 with DagsterInstance.ephemeral() as instance: assert (resolve_memoized_execution_plan( speculative_execution_plan, partially_versioned_pipeline, {}, instance, resolved_run_config, ).step_keys_to_execute == ["solid_takes_input"])
def test_construct_log_string_for_event(): step_output_event = DagsterEvent( event_type_value="STEP_OUTPUT", pipeline_name="my_pipeline", step_key="solid2", solid_handle=SolidHandle("solid2", None), step_kind_value="COMPUTE", logging_tags={}, event_specific_data=StepOutputData( step_output_handle=StepOutputHandle("solid2", "result")), message= 'Yielded output "result" of type "Any" for step "solid2". (Type check passed).', pid=54348, ) message_props = { "dagster_event": step_output_event, "pipeline_name": "my_pipeline" } synth_props = { "orig_message": step_output_event.message, "run_id": "f79a8a93-27f1-41b5-b465-b35d0809b26d", } assert ( construct_log_string(message_props=message_props, logging_tags={}, synth_props=synth_props) == 'my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_OUTPUT - Yielded output "result" of type "Any" for step "solid2". (Type check passed).' )
def _type_check_and_store_output( step_context: SystemStepExecutionContext, output: Union[DynamicOutput, Output]) -> Iterator[DagsterEvent]: check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.inst_param(output, "output", (Output, DynamicOutput)) mapping_key = output.mapping_key if isinstance(output, DynamicOutput) else None step_output_handle = StepOutputHandle(step_key=step_context.step.key, output_name=output.output_name, mapping_key=mapping_key) version = (step_context.execution_plan.resolve_step_output_versions().get( step_output_handle) if MEMOIZED_RUN_TAG in step_context.pipeline.get_definition().tags else None) for output_event in _type_check_output(step_context, step_output_handle, output, version): yield output_event for evt in _store_output(step_context, step_output_handle, output): yield evt for evt in _create_type_materializations(step_context, output.output_name, output.value): yield evt
def yield_result(self, value, output_name="result"): """Yield a result directly from notebook code. When called interactively or in development, returns its input. Args: value (Any): The value to yield. output_name (Optional[str]): The name of the result to yield (default: ``'result'``). """ if not self.in_pipeline: return value # deferred import for perf import scrapbook if not self.solid_def.has_output(output_name): raise DagstermillError( f"Solid {self.solid_def.name} does not have output named {output_name}." f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}" ) # pass output value cross process boundary using io manager step_context = self.context._step_context # pylint: disable=protected-access # Note: yield_result currently does not support DynamicOutput step_output_handle = StepOutputHandle(step_key=step_context.step.key, output_name=output_name) output_context = step_context.get_output_context(step_output_handle) io_manager = step_context.get_io_manager(step_output_handle) # Note that we assume io manager is symmetric, i.e handle_input(handle_output(X)) == X io_manager.handle_output(output_context, value) # record that the output has been yielded scrapbook.glue(output_name, "")
def handle_output(self, context, obj): res = self.intermediate_storage.set_intermediate( context=context.step_context, dagster_type=context.dagster_type, step_output_handle=StepOutputHandle( context.step_key, context.name, context.mapping_key ), value=obj, version=context.version, ) # Stopgap https://github.com/dagster-io/dagster/issues/3368 if isinstance(res, ObjectStoreOperation): context.log.debug( ( 'Stored output "{output_name}" in {object_store_name}object store{serialization_strategy_modifier} ' "at {address}" ).format( output_name=context.name, object_store_name=res.object_store_name, serialization_strategy_modifier=( " using {serialization_strategy_name}".format( serialization_strategy_name=res.serialization_strategy_name ) if res.serialization_strategy_name else "" ), address=res.key, ) )
def test_construct_log_string_for_event(): step_output_event = DagsterEvent( event_type_value="STEP_OUTPUT", pipeline_name="my_pipeline", step_key="solid2", solid_handle=NodeHandle("solid2", None), step_kind_value="COMPUTE", logging_tags={}, event_specific_data=StepOutputData( step_output_handle=StepOutputHandle("solid2", "result")), message= 'Yielded output "result" of type "Any" for step "solid2". (Type check passed).', pid=54348, ) logging_metadata = DagsterLoggingMetadata( run_id="f79a8a93-27f1-41b5-b465-b35d0809b26d", pipeline_name="my_pipeline") dagster_message_props = DagsterMessageProps( orig_message=step_output_event.message, dagster_event=step_output_event, ) assert ( construct_log_string(logging_metadata=logging_metadata, message_props=dagster_message_props) == 'my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_OUTPUT - Yielded output "result" of type "Any" for step "solid2". (Type check passed).' )
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { "storage": { "filesystem": {} }, "intermediate_storage": { "in_memory": {} } } instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, run_config=run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"]), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert not intermediate_storage.has_intermediate( None, StepOutputHandle("return_one"))
def logged_solid(context): context.get_system_context().get_output_context( StepOutputHandle("logged_solid", "result")).log.debug( "test OUTPUT debug logging from logged_solid.") context.get_system_context().for_input_manager( "logged_solid", {}, {}, None, source_handle=None).log.debug( "test INPUT debug logging from logged_solid.")
def test_get_output_context_with_resources(): @solid def basic_solid(): pass @pipeline def basic_pipeline(): basic_solid() with pytest.raises( CheckError, match="Expected either resources or step context to be set, but " "received both. If step context is provided, resources for IO manager will be " "retrieved off of that.", ): get_output_context( execution_plan=create_execution_plan(basic_pipeline), pipeline_def=basic_pipeline, resolved_run_config=ResolvedRunConfig.build(basic_pipeline), step_output_handle=StepOutputHandle("basic_solid", "result"), run_id=None, log_manager=None, step_context=mock.MagicMock(), resources=mock.MagicMock(), )