def test_reexecution_fs_storage(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline( pipeline_def, environment_dict={'storage': { 'filesystem': {} }}, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 2 assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 assert reexecution_result.result_for_solid('add_one').output_value() == 2 reexecution_run = instance.get_run_by_id(reexecution_result.run_id) assert reexecution_run.parent_run_id == pipeline_result.run_id assert reexecution_run.root_run_id == pipeline_result.run_id pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=pipeline_result.run_id, ) grandchild_result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert grandchild_result.success assert len(grandchild_result.solid_result_list) == 2 assert grandchild_result.result_for_solid('return_one').output_value() == 1 assert grandchild_result.result_for_solid('add_one').output_value() == 2 grandchild_run = instance.get_run_by_id(grandchild_result.run_id) assert grandchild_run.parent_run_id == reexecution_result.run_id assert grandchild_run.root_run_id == pipeline_result.run_id
def test_in_memory_persist_one_run(): with DagsterInstance.ephemeral() as instance: do_test_single_write_read(instance)
def test_create_app_with_workspace(): with load_workspace_from_yaml_paths( [file_relative_path(__file__, "./workspace.yaml")], ) as workspace: assert create_app_from_workspace(workspace, DagsterInstance.ephemeral())
def test_create_app_with_workspace(): with load_workspace_process_context_from_yaml_paths( DagsterInstance.ephemeral(), [file_relative_path(__file__, "./workspace.yaml")], ) as workspace_process_context: assert create_app_from_workspace_process_context(workspace_process_context)
def test_successful_pipeline_reexecution(snapshot): def sanitize_result_data(result_data): if isinstance(result_data, dict): if 'path' in result_data: result_data['path'] = 'DUMMY_PATH' result_data = { k: sanitize_result_data(v) for k, v in result_data.items() } elif isinstance(result_data, list): for i in range(len(result_data)): result_data[i] = sanitize_result_data(result_data[i]) else: pass return result_data run_id = str(uuid.uuid4()) instance = DagsterInstance.ephemeral() result_one = execute_dagster_graphql( define_context(instance=instance), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert (result_one.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess') snapshot.assert_match(sanitize_result_data(result_one.data)) expected_value_repr = ( '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), ''' '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), ''' '''('sum_sq', 49)])]''') store = FilesystemIntermediateStore.for_instance(instance, run_id) assert store.has_intermediate(None, 'sum_solid.compute') assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame).obj) == expected_value_repr) new_run_id = str(uuid.uuid4()) result_two = execute_dagster_graphql( define_context(instance=instance), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': csv_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.compute'], 'executionMetadata': { 'runId': new_run_id }, 'mode': 'default', }, 'reexecutionConfig': { 'previousRunId': run_id, 'stepOutputHandles': [{ 'stepKey': 'sum_solid.compute', 'outputName': 'result' }], }, }, ) query_result = result_two.data['startPipelineExecution'] assert query_result['__typename'] == 'StartPipelineExecutionSuccess' logs = query_result['run']['logs']['nodes'] assert isinstance(logs, list) assert has_event_of_type(logs, 'PipelineStartEvent') assert has_event_of_type(logs, 'PipelineSuccessEvent') assert not has_event_of_type(logs, 'PipelineFailureEvent') assert not get_step_output_event(logs, 'sum_solid.compute') assert get_step_output_event(logs, 'sum_sq_solid.compute') snapshot.assert_match(sanitize_result_data(result_two.data)) store = FilesystemIntermediateStore.for_instance(instance, new_run_id) assert not store.has_intermediate(None, 'sum_solid.inputs.num.read', 'input_thunk_output') assert store.has_intermediate(None, 'sum_solid.compute') assert store.has_intermediate(None, 'sum_sq_solid.compute') assert (str( store.get_intermediate(None, 'sum_sq_solid.compute', PoorMansDataFrame).obj) == expected_value_repr)
def test_create_app_with_reconstructable_repo(): recon_repo = ReconstructableRepository.from_yaml( file_relative_path(__file__, './repository.yaml') ) assert create_app_with_reconstructable_repo(recon_repo, DagsterInstance.ephemeral())
def test_using_adls2_for_subplan(storage_account, file_system): pipeline_def = define_inty_pipeline() run_config = { "resources": { "adls2": { "config": { "storage_account": storage_account, "credential": get_azure_credential() } } }, "intermediate_storage": { "adls2": { "config": { "adls2_file_system": file_system } } }, } run_id = make_new_run_id() environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: resource = context.scoped_resources_builder.build( required_resource_keys={"adls2"}).adls2 intermediate_storage = ADLS2IntermediateStorage( file_system=file_system, run_id=run_id, adls2_client=resource.adls2_client, blob_client=resource.blob_client, ) step_output_handle = StepOutputHandle("return_one") assert intermediate_storage.has_intermediate(context, step_output_handle) assert intermediate_storage.get_intermediate( context, Int, step_output_handle).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, "add_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: step_output_handle = StepOutputHandle("add_one") assert intermediate_storage.has_intermediate(context, step_output_handle) assert intermediate_storage.get_intermediate( context, Int, step_output_handle).obj == 2
def _check_execute_pipeline_args(fn_name, pipeline, environment_dict, mode, preset, tags, run_config, instance): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, default=RunConfig()) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) check.invariant( run_config.mode is None or pipeline_preset.mode == run_config.mode, 'The mode set in preset \'{preset}\' (\'{preset_mode}\') does not agree with the mode ' 'set in the `run_config` (\'{run_config_mode}\')'.format( preset=preset, preset_mode=pipeline_preset.mode, run_config_mode=run_config.mode), ) if pipeline_preset.environment_dict is not None: check.invariant( (not environment_dict) or (pipeline_preset.environment_dict == environment_dict), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `environment_dict` argument.'.format( preset=preset), ) environment_dict = pipeline_preset.environment_dict if pipeline_preset.solid_subset is not None: pipeline = pipeline.build_sub_pipeline( pipeline_preset.solid_subset) check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if run_config.mode is not None or run_config.tags: warnings.warn(( 'In 0.8.0, the use of `run_config` to set pipeline mode and tags will be ' 'deprecated. Please use the `mode` and `tags` arguments to `{fn_name}` ' 'instead.').format(fn_name=fn_name)) if run_config.mode is not None: if mode is not None: check.invariant( run_config.mode == mode, 'Mode \'{mode}\' does not agree with the mode set in the `run_config`: ' '\'{run_config_mode}\''.format( mode=mode, run_config_mode=run_config.mode), ) mode = run_config.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if not pipeline_def.is_single_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(merge_dicts(pipeline_def.tags, run_config.tags or {}), tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, environment_dict, mode=mode, step_keys_to_execute=run_config.step_keys_to_execute, ) return pipeline, environment_dict, instance, mode, tags, run_config, execution_plan
def test_basic_execute_plan_with_materialization(): with get_temp_file_name() as out_csv_path: environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, '../data/num.csv') }, 'outputs': [{ 'result': out_csv_path }], } } } instance = DagsterInstance.ephemeral() result = execute_dagster_graphql( define_test_context(instance=instance), EXECUTION_PLAN_QUERY, variables={ 'pipeline': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'mode': 'default', }, ) steps_data = result.data['executionPlan']['steps'] assert [step_data['key'] for step_data in steps_data] == [ 'sum_solid.compute', 'sum_sq_solid.compute', ] run_id = make_new_run_id() instance.create_empty_run(run_id, 'csv_hello_world') result = execute_dagster_graphql( define_test_context(instance=instance), EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': { 'name': 'csv_hello_world' }, 'environmentConfigData': environment_dict, 'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'], 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) assert result.data step_mat_event = None for message in result.data['executePlan']['stepEvents']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event['materialization'] assert len(step_mat_event['materialization']['metadataEntries']) == 1 metadata_entry = step_mat_event['materialization']['metadataEntries'][ 0] assert metadata_entry['path'] == out_csv_path
def _check_execute_pipeline_args(pipeline, run_config, mode, preset, tags, instance, solid_selection=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) run_config = check.opt_dict_param(run_config, 'run_config') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) if pipeline_preset.run_config is not None: check.invariant( (not run_config) or (pipeline_preset.run_config == run_config), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `run_config` argument.'.format(preset=preset), ) run_config = pipeline_preset.run_config # load solid_selection from preset if pipeline_preset.solid_selection is not None: check.invariant( solid_selection is None or solid_selection == pipeline_preset.solid_selection, 'The solid_selection set in preset \'{preset}\', {preset_subset}, does not agree with ' 'the `solid_selection` argument: {solid_selection}'.format( preset=preset, preset_subset=pipeline_preset.solid_selection, solid_selection=solid_selection, ), ) solid_selection = pipeline_preset.solid_selection check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if pipeline_def.is_multi_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(pipeline_def.tags, tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() # generate pipeline subset from the given solid_selection if solid_selection: pipeline = pipeline.subset_for_execution(solid_selection) return ( pipeline, run_config, instance, mode, tags, pipeline.solids_to_execute, solid_selection, )
def test_all_step_events(): # pylint: disable=too-many-locals workspace = workspace_from_load_target( PythonFileTarget(__file__, define_test_events_pipeline.__name__) ) pipeline_def = define_test_events_pipeline() mode = pipeline_def.get_default_mode_name() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode ) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'repositoryLocationName': 'test_events', 'repositoryName': '<<unnamed>>', 'pipelineName': pipeline_def.name, }, 'runConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': pipeline_run.run_id}, 'stepKeys': [step.key], }, } res = execute_query(workspace, EXECUTE_PLAN_MUTATION, variables, instance=instance,) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): assert 'data' in res, res assert 'executePlan' in res['data'], res assert 'stepEvents' in res['data']['executePlan'], res step_events = res['data']['executePlan']['stepEvents'] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT])) ): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == {DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED}
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} result = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, ) assert result.success with open( os.path.join(instance.storage_directory(), result.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 4 with open( os.path.join(instance.storage_directory(), result.run_id, "add_two", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 6 ## re-execute add_two resolved_run_config = ResolvedRunConfig.build( pipeline_def, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline_def), resolved_run_config, ) subset_plan = execution_plan.build_subset_plan(["add_two"], pipeline_def, resolved_run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=subset_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) step_events = execute_plan( subset_plan, InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) assert not os.path.exists( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result") ) with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_two", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two")
def _check_execute_pipeline_args(pipeline, run_config, mode, preset, tags, instance, solid_selection=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.invariant( not (mode is not None and preset is not None), "You may set only one of `mode` (got {mode}) or `preset` (got {preset})." .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) if pipeline_preset.run_config is not None: check.invariant( (not run_config) or (pipeline_preset.run_config == run_config), "The environment set in preset '{preset}' does not agree with the environment " "passed in the `run_config` argument.".format(preset=preset), ) run_config = pipeline_preset.run_config # load solid_selection from preset if pipeline_preset.solid_selection is not None: check.invariant( solid_selection is None or solid_selection == pipeline_preset.solid_selection, "The solid_selection set in preset '{preset}', {preset_subset}, does not agree with " "the `solid_selection` argument: {solid_selection}".format( preset=preset, preset_subset=pipeline_preset.solid_selection, solid_selection=solid_selection, ), ) solid_selection = pipeline_preset.solid_selection check.invariant( mode is None or mode == pipeline_preset.mode, "Mode {mode} does not agree with the mode set in preset '{preset}': " "('{preset_mode}')".format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode tags = merge_dicts(pipeline_preset.tags, tags) if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( "You have attempted to execute pipeline {name} with mode {mode}. " "Available modes: {modes}").format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if pipeline_def.is_multi_mode: raise DagsterInvariantViolationError(( "Pipeline {name} has multiple modes (Available modes: {modes}) and you have " "attempted to execute it without specifying a mode. Set " "mode property on the PipelineRun object.").format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(pipeline_def.tags, tags) check.opt_inst_param(instance, "instance", DagsterInstance) instance = instance or DagsterInstance.ephemeral() # generate pipeline subset from the given solid_selection if solid_selection: pipeline = pipeline.subset_for_execution(solid_selection) return ( pipeline, run_config, instance, mode, tags, pipeline.solids_to_execute, solid_selection, )
def test_reexecution_fs_storage_with_subset(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['return_one.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result_no_subset = execute_run( InMemoryExecutablePipeline(pipeline_def), reexecution_pipeline_run, instance) assert reexecution_result_no_subset.success assert len(reexecution_result_no_subset.solid_result_list) == 2 assert reexecution_result_no_subset.result_for_solid('add_one').skipped assert reexecution_result_no_subset.result_for_solid( 'return_one').output_value() == 1 pipeline_result_subset = execute_pipeline( pipeline_def, environment_dict=environment_dict, instance=instance, solid_selection=['return_one'], ) assert pipeline_result_subset.success assert len(pipeline_result_subset.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_subset.result_for_solid('add_one') assert pipeline_result_subset.result_for_solid( 'return_one').output_value() == 1 reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result_subset.run_id, root_run_id=pipeline_result_subset.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['return_one.compute'], ) reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def), reexecution_pipeline_run, instance) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_subset.result_for_solid('add_one') assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 with pytest.raises( DagsterExecutionStepNotFoundError, match=re.escape( 'Execution plan does not contain step: add_one.compute'), ): instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result_subset.run_id, root_run_id=pipeline_result_subset.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['add_one.compute'], ) re_reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=reexecution_result.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['return_one.compute'], ) re_reexecution_result = execute_run( InMemoryExecutablePipeline(pipeline_def), re_reexecution_pipeline_run, instance) assert re_reexecution_result.success assert len(re_reexecution_result.solid_result_list) == 1 assert re_reexecution_result.result_for_solid( 'return_one').output_value() == 1 with pytest.raises( DagsterExecutionStepNotFoundError, match=re.escape('Execution plan does not contain step: add_one'), ): instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=reexecution_result.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['add_one.compute'], )
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() run_config = RunConfig(mode=mode) execution_plan = create_execution_plan(pipeline, {}, run_config=run_config) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': run_config.run_id}, 'stepKeys': [step.key], } } instance = DagsterInstance.ephemeral() res = execute_query( handle, START_PIPELINE_EXECUTION_QUERY, variables, instance=instance ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(run_config.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT])) ): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_reexecution_fs_storage_with_solid_selection(): @solid def return_one(): return 1 @solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], name="test", dependencies={"add_one": { "num": DependencyDefinition("return_one") }}, ) run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() # Case 1: re-execute a part of a pipeline when the original pipeline doesn't have solid selection pipeline_result = execute_pipeline(pipeline_def, run_config, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one").output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_result_no_solid_selection = reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result.run_id, run_config=run_config, step_selection=["return_one"], instance=instance, ) assert reexecution_result_no_solid_selection.success assert len(reexecution_result_no_solid_selection.solid_result_list) == 2 assert reexecution_result_no_solid_selection.result_for_solid( "add_one").skipped assert reexecution_result_no_solid_selection.result_for_solid( "return_one").output_value() == 1 # Case 2: re-execute a pipeline when the original pipeline has solid selection pipeline_result_solid_selection = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, solid_selection=["return_one"], ) assert pipeline_result_solid_selection.success assert len(pipeline_result_solid_selection.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_solid_selection.result_for_solid("add_one") assert pipeline_result_solid_selection.result_for_solid( "return_one").output_value() == 1 reexecution_result_solid_selection = reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result_solid_selection.run_id, run_config=run_config, instance=instance, ) assert reexecution_result_solid_selection.success assert len(reexecution_result_solid_selection.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_solid_selection.result_for_solid("add_one") assert reexecution_result_solid_selection.result_for_solid( "return_one").output_value() == 1 # Case 3: re-execute a pipeline partially when the original pipeline has solid selection and # re-exeucte a step which hasn't been included in the original pipeline with pytest.raises( DagsterExecutionStepNotFoundError, match="Step selection refers to unknown step: add_one", ): reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result_solid_selection.run_id, run_config=run_config, step_selection=["add_one"], instance=instance, ) # Case 4: re-execute a pipeline partially when the original pipeline has solid selection and # re-exeucte a step which has been included in the original pipeline re_reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=reexecution_result_solid_selection.run_id, run_config=run_config, instance=instance, step_selection=["return_one"], ) assert re_reexecution_result.success assert len(re_reexecution_result.solid_result_list) == 1 assert re_reexecution_result.result_for_solid( "return_one").output_value() == 1
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {"solids": { "add_one": { "inputs": { "num": { "value": 3 } } } }}) result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 ## re-execute add_two pipeline_reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_selection=["add_two"], instance=instance, ) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two") with pytest.raises( DagsterExecutionStepNotFoundError, match="Can not build subset plan from unknown step: nope", ): reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_selection=["nope"], instance=instance, )
def define_context(repo_fn, instance=None): return DagsterGraphQLInProcessRepositoryContext( handle=ExecutionTargetHandle.for_repo_fn(repo_fn), instance=instance or DagsterInstance.ephemeral(), execution_manager=SynchronousExecutionManager(), )
def test_create_app(): handle = ExecutionTargetHandle.for_repo_yaml(file_relative_path(__file__, './repository.yaml')) assert create_app(handle, DagsterInstance.ephemeral())
def define_subprocess_context_for_file(python_file, fn_name, instance=None): return DagsterGraphQLInProcessRepositoryContext( handle=ExecutionTargetHandle.for_repo_python_file(python_file, fn_name), instance=instance or DagsterInstance.ephemeral(), execution_manager=SubprocessExecutionManager(instance), )
def execute_pipeline(pipeline, environment_dict=None, run_config=None, instance=None): ''' "Synchronous" version of :py:func:`execute_pipeline_iterator`. This is the entry point for dagster CLI and dagit execution. For the dagster-graphql entry point, see execute_plan() below. Parameters: pipeline (PipelineDefinition): Pipeline to run environment_dict (dict): The enviroment configuration that parameterizes this run run_config (RunConfig): Configuration for how this pipeline will be executed instance (DagsterInstance): The instance to execute against, defaults to ephemeral (no artifacts persisted) Returns: :py:class:`PipelineExecutionResult` ''' check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check_run_config_param(run_config, pipeline) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, environment_dict, run_config) # run should be used and threaded through here # https://github.com/dagster-io/dagster/issues/1745 _run = _create_run(instance, pipeline, run_config, environment_dict) with scoped_pipeline_context(pipeline, environment_dict, run_config, instance) as pipeline_context: event_list = list( _pipeline_execution_iterator( pipeline_context, execution_plan=execution_plan, run_config=run_config, step_keys_to_execute=run_config.step_keys_to_execute, )) return PipelineExecutionResult( pipeline, run_config.run_id, event_list, lambda: scoped_pipeline_context( pipeline, environment_dict, run_config, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context. intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def command(**kwargs): capture_result['external_repo'] = get_external_repository_from_kwargs( kwargs, DagsterInstance.ephemeral())
def get_context(self, solid_config=None, mode_def=None, environment_dict=None): '''Get a dagstermill execution context for interactive exploration and development. Args: solid_config (Optional[Any]): If specified, this value will be made available on the context as its ``solid_config`` property. mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to use to construct the context. Specify this if you would like a context constructed with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode with a console logger will be constructed. environment_dict(Optional[dict]): The environment config dict with which to construct the context. Returns: :py:class:`~dagstermill.DagstermillExecutionContext` ''' check.opt_inst_param(mode_def, 'mode_def', ModeDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) # If we are running non-interactively, and there is already a context reconstituted, return # that context rather than overwriting it. if self.context is not None and isinstance( self.context, DagstermillRuntimeExecutionContext): return self.context if not mode_def: mode_def = ModeDefinition( logger_defs={'dagstermill': colored_console_logger}) environment_dict['loggers'] = {'dagstermill': {}} solid_def = SolidDefinition( name='this_solid', input_defs=[], compute_fn=lambda *args, **kwargs: None, output_defs=[], description= 'Ephemeral solid constructed by dagstermill.get_context()', required_resource_keys=mode_def.resource_key_set, ) pipeline_def = PipelineDefinition( [solid_def], mode_defs=[mode_def], name='ephemeral_dagstermill_pipeline') run_id = make_new_run_id() # construct stubbed PipelineRun for notebook exploration... # The actual pipeline run during pipeline execution will be serialized and reconstituted # in the `reconstitute_pipeline_context` call pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict, mode=mode_def.name, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, tags=None, ) self.in_pipeline = False self.solid_def = solid_def self.pipeline = pipeline_def execution_plan = create_execution_plan(self.pipeline, environment_dict, mode=mode_def.name) with scoped_pipeline_context( execution_plan, environment_dict, pipeline_run, DagsterInstance.ephemeral(), scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext( pipeline_context=pipeline_context, solid_config=solid_config, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_context.system_storage_def), ) return self.context
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping') instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline_with_config_mapping.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } run_id = make_new_run_id() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS
def define_context_for_repository_yaml(path, instance=None): return DagsterGraphQLContext( handle=ExecutionTargetHandle.for_repo_yaml(path), instance=instance or DagsterInstance.ephemeral(), execution_manager=SynchronousExecutionManager(), )
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {"solids": { "add_one": { "inputs": { "num": { "value": 3 } } } }}) result = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, ) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 ## re-execute add_two environment_config = EnvironmentConfig.build( pipeline_def, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline_def), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, step_keys_to_execute=["add_two"], ) step_events = execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two")
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {"solids": { "add_one": { "inputs": { "num": { "value": 3 } } } }}) result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one.compute")).obj == 4) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two.compute")).obj == 6) ## re-execute add_two pipeline_reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_selection=["add_two.compute"], instance=instance, ) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one.compute")).obj == 4) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two.compute")).obj == 6) assert not get_step_output_event(step_events, "add_one.compute") assert get_step_output_event(step_events, "add_two.compute") with pytest.raises( DagsterInvalidSubsetError, match="No qualified steps to execute found for step_selection"): reexecute_pipeline( pipeline_def, parent_run_id=result.run_id, run_config=run_config, step_selection=["nope.compute"], instance=instance, )
def execute_pipeline(pipeline, environment_dict=None, run_config=None, instance=None, raise_on_error=True): '''Execute a pipeline synchronously. Users will typically call this API when testing pipeline execution, or running standalone scripts. Parameters: pipeline (PipelineDefinition): The pipeline to execute. environment_dict (Optional[dict]): The enviroment configuration that parameterizes this run, as a dict. run_config (Optional[RunConfig]): Optionally specifies additional config options for pipeline execution. instance (Optional[DagsterInstance]): The instance to execute against. If this is ``None``, an ephemeral instance will be used, and no artifacts will be persisted from the run. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``, since this is the most useful behavior in test. Returns: :py:class:`PipelineExecutionResult`: The result of pipeline execution. For the asynchronous version, see :py:func:`execute_pipeline_iterator`. This is the entrypoint for dagster CLI execution. For the dagster-graphql entrypoint, see ``dagster.core.execution.api.execute_plan()``. ''' check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check_run_config_param(run_config, pipeline) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, environment_dict, run_config) pipeline_run = _create_run(instance, pipeline, run_config, environment_dict) with scoped_pipeline_context( pipeline, environment_dict, pipeline_run, instance, raise_on_error=raise_on_error) as pipeline_context: event_list = list( _pipeline_execution_iterator(pipeline_context, execution_plan, pipeline_run)) return PipelineExecutionResult( pipeline, run_config.run_id, event_list, lambda: scoped_pipeline_context( pipeline, environment_dict, pipeline_run, instance, system_storage_data=SystemStorageData( intermediates_manager=pipeline_context. intermediates_manager, file_manager=pipeline_context.file_manager, ), ), )
def test_using_s3_for_subplan(s3_bucket): pipeline_def = define_inty_pipeline() environment_dict = { 'storage': { 's3': { 'config': { 's3_bucket': s3_bucket } } } } run_id = make_new_run_id() execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id)) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun.create_empty_run( pipeline_def.name, run_id=run_id, environment_dict=environment_dict) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context( pipeline_def, environment_dict, pipeline_run, instance, execution_plan.build_subset_plan(['return_one.compute']), ) as context: store = S3IntermediateStore( s3_bucket, run_id, s3_session=context.scoped_resources_builder.build( required_resource_keys={'s3'}, ).s3, ) assert store.has_intermediate(context, 'return_one.compute') assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context( pipeline_def, environment_dict, pipeline_run, instance, execution_plan.build_subset_plan(['add_one.compute']), ) as context: assert store.has_intermediate(context, 'add_one.compute') assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_using_gcs_for_subplan(gcs_bucket): pipeline_def = define_inty_pipeline() run_config = {"intermediate_storage": {"gcs": {"config": {"gcs_bucket": gcs_bucket}}}} run_id = make_new_run_id() resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), resolved_run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config ) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, resolved_run_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(return_one_step_events, "return_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, resolved_run_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: intermediate_storage = GCSIntermediateStorage( gcs_bucket, run_id, client=context.scoped_resources_builder.build( required_resource_keys={"gcs"}, ).gcs, ) assert intermediate_storage.has_intermediate(context, StepOutputHandle("return_one")) assert ( intermediate_storage.get_intermediate(context, Int, StepOutputHandle("return_one")).obj == 1 ) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, resolved_run_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(add_one_step_events, "add_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, resolved_run_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: assert intermediate_storage.has_intermediate(context, StepOutputHandle("add_one")) assert ( intermediate_storage.get_intermediate(context, Int, StepOutputHandle("add_one")).obj == 2 )