def yield_empty_pipeline_context(run_id=None, instance=None): pipeline = InMemoryPipeline(PipelineDefinition([])) pipeline_def = pipeline.get_definition() instance = check.opt_inst_param(instance, "instance", DagsterInstance, default=DagsterInstance.ephemeral()) execution_plan = create_execution_plan(pipeline) pipeline_run = instance.create_run( pipeline_name="<empty>", run_id=run_id, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(), ) with scoped_pipeline_context(execution_plan, {}, pipeline_run, instance) as context: yield context
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success ## re-execute add_two environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterStepOutputNotFoundError" in failures[ 0].event_specific_data.error.message
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )
def test_execute_canceled_state(): def event_callback(_record): pass with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) logs = instance.all_logs(pipeline_run.run_id) assert len(logs) == 1 assert ( "Not starting execution since the run was canceled before execution could start" in logs[0].message) iter_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) iter_events = list( execute_run_iterator(InMemoryPipeline(pipeline_def), iter_run, instance=instance)) assert len(iter_events) == 1 assert ( "Not starting execution since the run was canceled before execution could start" in iter_events[0].message)
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterExecutionLoadInputError" in failures[ 0].event_specific_data.error.message
def test_subset_for_execution(): pipeline = InMemoryPipeline(foo_pipeline) sub_pipeline = pipeline.subset_for_execution(["*add_nums"]) assert sub_pipeline.solid_selection == ["*add_nums"] assert sub_pipeline.solids_to_execute == {"add_nums", "return_one", "return_two"} result = execute_pipeline(sub_pipeline) assert result.success
def test_execute_run_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={"a": resource_a, "b": resource_b}, logger_defs={"callback": construct_event_logger(event_callback)}, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default", ) iterator = execute_run_iterator( InMemoryPipeline(pipeline_def), pipeline_run, instance=instance ) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() events = [record.dagster_event for record in records if record.is_dagster_event] messages = [record.user_message for record in records if not record.is_dagster_event] pipeline_failure_events = [event for event in events if event.is_pipeline_failure] assert len(pipeline_failure_events) == 1 assert "GeneratorExit" in pipeline_failure_events[0].pipeline_failure_data.error.message assert len([message for message in messages if message == "CLEANING A"]) > 0 assert len([message for message in messages if message == "CLEANING B"]) > 0 pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default", ).with_status(PipelineRunStatus.SUCCESS) with pytest.raises( check.CheckError, match=r"Pipeline run basic_resource_pipeline \({}\) in state" r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED".format( pipeline_run.run_id ), ): execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)
def test_using_intermediates_file_system_for_subplan(): pipeline = define_inty_pipeline() run_config = {"intermediate_storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate(None, StepOutputHandle("add_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2
def create_test_pipeline_execution_context(logger_defs=None): from dagster.core.storage.intermediate_storage import build_in_mem_intermediates_storage loggers = check.opt_dict_param( logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition ) mode_def = ModeDefinition(logger_defs=loggers) pipeline_def = PipelineDefinition( name="test_legacy_context", solid_defs=[], mode_defs=[mode_def] ) run_config = {"loggers": {key: {} for key in loggers}} pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config) creation_data = create_context_creation_data( InMemoryPipeline(pipeline_def), execution_plan, run_config, pipeline_run, instance ) log_manager = create_log_manager(creation_data) scoped_resources_builder = ScopedResourcesBuilder() executor = create_executor(creation_data) return PlanExecutionContext( plan_data=create_plan_data(creation_data, True, executor.retries), execution_data=create_execution_data( context_creation_data=creation_data, scoped_resources_builder=scoped_resources_builder, intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id), ), log_manager=log_manager, output_capture=None, )
def test_reentrant_execute_plan(): called = {} @solid def has_tag(context): assert context.has_tag("foo") assert context.get_tag("foo") == "bar" called["yup"] = True pipeline_def = PipelineDefinition(name="has_tag_pipeline", solid_defs=[has_tag]) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, tags={"foo": "bar"}, execution_plan=execution_plan ) step_events = execute_plan( execution_plan, InMemoryPipeline(pipeline_def), pipeline_run=pipeline_run, instance=instance ) assert called["yup"] assert ( find_events(step_events, event_type="STEP_OUTPUT")[0].logging_tags["pipeline_tags"] == "{'foo': 'bar'}" )
def test_execution_plan_simple_two_steps(): pipeline_def = define_two_int_pipeline() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan ) assert isinstance(execution_plan.steps, list) assert len(execution_plan.steps) == 2 assert execution_plan.get_step_by_key("return_one") assert execution_plan.get_step_by_key("add_one") events = execute_plan( execution_plan, InMemoryPipeline(pipeline_def), pipeline_run=pipeline_run, instance=instance ) step_starts = find_events(events, event_type="STEP_START") assert len(step_starts) == 2 step_successes = find_events(events, event_type="STEP_SUCCESS") assert len(step_successes) == 2 output_events = find_events(events, event_type="STEP_OUTPUT") assert output_events[0].step_key == "return_one" assert output_events[0].is_successful_output assert output_events[1].step_key == "add_one" assert output_events[1].is_successful_output
def test_execution_plan_wrong_run_id(): pipeline_def = define_addy_pipeline(using_file_system=True) unrun_id = "not_a_run" run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def, run_config=run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=unrun_id, root_run_id=unrun_id, ) with pytest.raises(DagsterRunNotFoundError) as exc_info: execute_plan( execution_plan, InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) assert str(exc_info.value) == "Run id {} set as parent run id was not found in instance".format( unrun_id ) assert exc_info.value.invalid_run_id == unrun_id
def core_execute_in_process( node: NodeDefinition, run_config: Dict[str, Any], ephemeral_pipeline: PipelineDefinition, instance: Optional[DagsterInstance], output_capturing_enabled: bool, raise_on_error: bool, run_tags: Optional[Dict[str, Any]] = None, run_id: Optional[str] = None, ) -> ExecuteInProcessResult: pipeline_def = ephemeral_pipeline mode_def = pipeline_def.get_mode_definition() pipeline = InMemoryPipeline(pipeline_def) execution_plan = create_execution_plan( pipeline, run_config=run_config, mode=mode_def.name, instance_ref=instance.get_ref() if instance and instance.is_persistent else None, ) output_capture: Dict[StepOutputHandle, Any] = {} with ephemeral_instance_if_missing(instance) as execute_instance: pipeline_run = execute_instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config=run_config, mode=mode_def.name, tags={**pipeline_def.tags, **(run_tags or {})}, run_id=run_id, ) run_id = pipeline_run.run_id execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=orchestration_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, pipeline_run=pipeline_run, instance=execute_instance, run_config=run_config, executor_defs=None, output_capture=output_capture if output_capturing_enabled else None, raise_on_error=raise_on_error, ), ) event_list = [] for event in execute_run_iterable: event_list.append(event) if event.is_pipeline_event: execute_instance.handle_run_event(run_id, event) return ExecuteInProcessResult( node, event_list, execute_instance.get_run_by_id(run_id), output_capture )
def test_execute_run_bad_state(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={"a": resource_a, "b": resource_b}, logger_defs={"callback": construct_event_logger(event_callback)}, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default", ).with_status(PipelineRunStatus.SUCCESS) with pytest.raises( check.CheckError, match=r"Pipeline run basic_resource_pipeline \({}\) in state" r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED".format( pipeline_run.run_id ), ): execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)
def test_execution_plan_two_outputs(): @solid(output_defs=[ OutputDefinition(Int, "num_one"), OutputDefinition(Int, "num_two") ]) def return_one_two(_context): yield Output(1, "num_one") yield Output(2, "num_two") pipeline_def = PipelineDefinition(name="return_one_two_pipeline", solid_defs=[return_one_two]) execution_plan = create_execution_plan(pipeline_def) instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan) events = execute_plan(execution_plan, InMemoryPipeline(pipeline_def), pipeline_run=pipeline_run, instance=instance) output_events = find_events(events, event_type="STEP_OUTPUT") assert output_events[0].step_key == "return_one_two" assert output_events[0].step_output_data.output_name == "num_one" assert output_events[1].step_key == "return_one_two" assert output_events[1].step_output_data.output_name == "num_two"
def _synthesize_events(solids_fn, run_id=None, check_success=True): events = [] def _append_event(event): events.append(event) @pipeline(mode_defs=[_mode_def(_append_event)]) def a_pipe(): solids_fn() with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( a_pipe, run_id=run_id, run_config={"loggers": { "callback": {}, "console": {} }}) result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance) if check_success: assert result.success return events, result
def test_compile(): environment_config = EnvironmentConfig.build( composition, {"solids": { "add_four": { "inputs": { "num": { "value": 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryPipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { "add_four.add_two.add_one", "add_four.add_two.add_one_2", "add_four.add_two_2.add_one", "add_four.add_two_2.add_one_2", "div_four.div_two", "div_four.div_two_2", "int_to_float", }
def synthesize_events(solids_fn, run_id=None): events = [] def _append_event(event): events.append(event) @pipeline(mode_defs=[mode_def(_append_event)]) def a_pipe(): solids_fn() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( a_pipe, run_id=run_id, run_config={"loggers": { "callback": {}, "console": {} }}) result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance) assert result.success return events, result
def _check_pipeline(pipeline): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryPipeline(pipeline) check.inst_param(pipeline, "pipeline", IPipeline) return pipeline
def test_single_step_resource_event_logs(): # Test to attribute logs for single-step plans which are often the representation of # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239 USER_SOLID_MESSAGE = "I AM A SOLID" USER_RESOURCE_MESSAGE = "I AM A RESOURCE" events = [] def event_callback(record): assert isinstance(record, EventRecord) events.append(record) @solid(required_resource_keys={"a"}) def resource_solid(context): context.log.info(USER_SOLID_MESSAGE) @resource def resource_a(context): context.log.info(USER_RESOURCE_MESSAGE) return "A" the_pipeline = PipelineDefinition( name="resource_logging_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={"a": resource_a}, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( the_pipeline, run_config={"loggers": { "callback": {} }}, step_keys_to_execute=["resource_solid"], ) result = execute_run(InMemoryPipeline(the_pipeline), pipeline_run, instance) assert result.success log_messages = [ event for event in events if isinstance(event, EventRecord) and event.level == coerce_valid_log_level("INFO") ] assert len(log_messages) == 2 resource_log_message = next( iter([ message for message in log_messages if message.user_message == USER_RESOURCE_MESSAGE ])) assert resource_log_message.step_key == "resource_solid"
def test_clean_event_generator_exit(): """Testing for generator cleanup (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/) """ from dagster.core.execution.context.init import InitResourceContext from dagster.core.definitions.resource import ScopedResourcesBuilder pipeline_def = gen_basic_resource_pipeline() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan) log_manager = DagsterLogManager(run_id=pipeline_run.run_id, logging_tags={}, loggers=[]) resolved_run_config = ResolvedRunConfig.build(pipeline_def) execution_plan = create_execution_plan(pipeline_def) resource_name, resource_def = next( iter(pipeline_def.get_default_mode().resource_defs.items())) resource_context = InitResourceContext( resource_def=resource_def, resources=ScopedResourcesBuilder().build(None), resource_config=None, pipeline_run=pipeline_run, instance=instance, ) generator = single_resource_event_generator(resource_context, resource_name, resource_def) next(generator) generator.close() resource_defs = pipeline_def.get_mode_definition(resolved_run_config.mode) generator = resource_initialization_event_generator( resource_defs=resource_defs, resource_configs=resolved_run_config.resources, log_manager=log_manager, execution_plan=execution_plan, pipeline_run=pipeline_run, resource_keys_to_init={"a"}, instance=instance, emit_persistent_events=True, pipeline_def_for_backwards_compat=pipeline_def, ) next(generator) generator.close() generator = PlanExecutionContextManager( # pylint: disable=protected-access pipeline=InMemoryPipeline(pipeline_def), execution_plan=execution_plan, run_config={}, pipeline_run=pipeline_run, instance=instance, retry_mode=RetryMode.DISABLED, scoped_resources_builder_cm=resource_initialization_manager, ).get_generator() next(generator) generator.close()
def execute_pipeline_with_steps(pipeline_def, step_keys_to_execute=None): plan = create_execution_plan(pipeline_def, step_keys_to_execute=step_keys_to_execute) with DagsterInstance.ephemeral() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, step_keys_to_execute=step_keys_to_execute, ) return execute_plan(plan, InMemoryPipeline(pipeline_def), instance, pipeline_run)
def test_using_file_system_for_subplan(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline), resolved_run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )) assert get_step_output(return_one_step_events, "return_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "return_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 2
def test_execute_plan_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) with instance_for_test() as instance: pipeline = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) run_config = {"loggers": {"callback": {}}} execution_plan = create_execution_plan(pipeline, run_config=run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, run_config={"loggers": { "callback": {} }}, execution_plan=execution_plan, ) iterator = execute_plan_iterator( execution_plan, InMemoryPipeline(pipeline), pipeline_run, instance, run_config=run_config, ) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len( [message for message in messages if message == "CLEANING A"]) > 0 assert len( [message for message in messages if message == "CLEANING B"]) > 0
def test_adls2_pipeline_with_custom_prefix(storage_account, file_system): adls2_prefix = "custom_prefix" pipe = define_inty_pipeline(should_throw=False) run_config = { "resources": { "adls2": { "config": { "storage_account": storage_account, "credential": get_azure_credential() } } }, "intermediate_storage": { "adls2": { "config": { "adls2_file_system": file_system, "adls2_prefix": adls2_prefix } } }, } pipeline_run = PipelineRun(pipeline_name=pipe.name, run_config=run_config) instance = DagsterInstance.ephemeral() result = execute_pipeline( pipe, run_config=run_config, ) assert result.success execution_plan = create_execution_plan(pipe, run_config) with scoped_pipeline_context( execution_plan, InMemoryPipeline(pipe), run_config, pipeline_run, instance, ) as context: resource = context.scoped_resources_builder.build( required_resource_keys={"adls2"}).adls2 intermediate_storage = ADLS2IntermediateStorage( run_id=result.run_id, file_system=file_system, prefix=adls2_prefix, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) assert intermediate_storage.root == "/".join( ["custom_prefix", "storage", result.run_id]) assert (intermediate_storage.get_intermediate( context, Int, StepOutputHandle("return_one")).obj == 1) assert (intermediate_storage.get_intermediate( context, Int, StepOutputHandle("add_one")).obj == 2)
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute"] assert str(exc_info.value ) == "Can not build subset plan from unknown step: nope.compute" with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan( ["nope.compute", "nuh_uh.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"] assert ( str(exc_info.value) == "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute" )
def test_execute_run_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) instance = DagsterInstance.local_temp() pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": { "callback": {} }}, mode="default", ) iterator = execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() events = [ record.dagster_event for record in records if record.is_dagster_event ] messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len([event for event in events if event.is_pipeline_failure]) > 0 assert len([message for message in messages if message == "CLEANING A"]) > 0 assert len([message for message in messages if message == "CLEANING B"]) > 0
def test_configured_input_should_skip_step(): called = {} @solid(output_defs=[OutputDefinition(is_required=False)]) def one(_): yield Output(1) @solid def solid_should_not_skip(_, input_one, input_two): # pylint: disable=unused-argument called["yup"] = True @pipeline def my_pipeline(): solid_should_not_skip(one()) run_config = { "solids": { "solid_should_not_skip": { "inputs": { "input_two": { "value": "2" } } } } } execute_pipeline(my_pipeline, run_config=run_config) assert called.get("yup") # ensure should_skip_step behave the same as execute_pipeline instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="my_pipeline", run_id=make_new_run_id()) execute_plan( create_execution_plan( my_pipeline, step_keys_to_execute=["one"], run_config=run_config, ), InMemoryPipeline(my_pipeline), instance, pipeline_run, run_config=run_config, ) assert not should_skip_step( create_execution_plan( my_pipeline, step_keys_to_execute=["solid_should_not_skip"], run_config=run_config, ), instance, pipeline_run.run_id, )
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { "storage": { "filesystem": {} }, "intermediate_storage": { "in_memory": {} } } instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert not intermediate_storage.has_intermediate( None, StepOutputHandle("return_one"))