def yield_empty_pipeline_context(run_id=None, instance=None): pipeline = InMemoryExecutablePipeline(PipelineDefinition([])) pipeline_def = pipeline.get_definition() instance = check.opt_inst_param( instance, 'instance', DagsterInstance, default=DagsterInstance.ephemeral() ) execution_plan = create_execution_plan(pipeline) pipeline_run = instance.create_run( pipeline_name='<empty>', run_id=run_id, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id() ), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(), ) with scoped_pipeline_context(execution_plan, {}, pipeline_run, instance) as context: yield context
def create_execution_plan(pipeline, environment_dict=None, mode=None, step_keys_to_execute=None): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) mode = check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_subset_for_execution(): pipeline = InMemoryExecutablePipeline(foo_pipeline) sub_pipeline = pipeline.subset_for_execution(['*add_nums']) assert sub_pipeline.solid_selection == ['*add_nums'] assert sub_pipeline.solids_to_execute == {'add_nums', 'return_one', 'return_two'} result = execute_pipeline(sub_pipeline) assert result.success
def _check_pipeline(pipeline): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() return pipeline, pipeline_def
def test_subset_for_execution(): pipeline = InMemoryExecutablePipeline(foo_pipeline) sub_pipeline = pipeline.subset_for_execution(["*add_nums"]) assert sub_pipeline.solid_selection == ["*add_nums"] assert sub_pipeline.solids_to_execute == { "add_nums", "return_one", "return_two" } result = execute_pipeline(sub_pipeline) assert result.success
def test_compile(): environment_config = EnvironmentConfig.build( composition, {"solids": { "add_four": { "inputs": { "num": { "value": 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { "add_four.add_two.add_one", "add_four.add_two.add_one_2", "add_four.add_two_2.add_one", "add_four.add_two_2.add_one_2", "div_four.div_two", "div_four.div_two_2", "int_to_float", }
def _check_pipeline(pipeline): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, "pipeline", ExecutablePipeline) return pipeline
def test_compile(): environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def synthesize_events(solids_fn, run_id=None): events = [] def _append_event(event): events.append(event) @pipeline(mode_defs=[mode_def(_append_event)]) def a_pipe(): solids_fn() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( a_pipe, run_id=run_id, environment_dict={'loggers': { 'callback': {}, 'console': {} }}) result = execute_run(InMemoryExecutablePipeline(a_pipe), pipeline_run, instance) assert result.success return events, result
def test_compile(): # TODO: remove dependency on legacy_examples # https://github.com/dagster-io/dagster/issues/2653 environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def yield_empty_pipeline_context(run_id=None, instance=None): pipeline = InMemoryExecutablePipeline(PipelineDefinition([])) instance = check.opt_inst_param(instance, 'instance', DagsterInstance, default=DagsterInstance.ephemeral()) pipeline_run = instance.create_run( pipeline_name='<empty>', run_id=run_id, environment_dict=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, parent_pipeline_snapshot=None, ) with scoped_pipeline_context( create_execution_plan(pipeline), {}, pipeline_run, instance, ) as context: yield context
def test_single_step_resource_event_logs(): # Test to attribute logs for single-step plans which are often the representation of # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239 USER_SOLID_MESSAGE = 'I AM A SOLID' USER_RESOURCE_MESSAGE = 'I AM A RESOURCE' events = [] def event_callback(record): assert isinstance(record, EventRecord) events.append(record) @solid(required_resource_keys={'a'}) def resource_solid(context): context.log.info(USER_SOLID_MESSAGE) @resource def resource_a(context): context.log.info(USER_RESOURCE_MESSAGE) return 'A' pipeline = PipelineDefinition( name='resource_logging_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={'a': resource_a}, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline, run_config={'loggers': { 'callback': {} }}, step_keys_to_execute=['resource_solid.compute'], ) result = execute_run(InMemoryExecutablePipeline(pipeline), pipeline_run, instance) assert result.success log_messages = [ event for event in events if isinstance(event, LogMessageRecord) ] assert len(log_messages) == 2 resource_log_message = next( iter([ message for message in log_messages if message.user_message == USER_RESOURCE_MESSAGE ])) assert resource_log_message.step_key == 'resource_solid.compute'
def run_one(self, instance): assert len(self._queue) > 0 run = self._queue.pop(0) pipeline_def = define_repository().get_pipeline(run.pipeline_name) return [ ev for ev in execute_run_iterator( InMemoryExecutablePipeline(pipeline_def), run, instance) ]
def test_execute_run_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) instance = DagsterInstance.local_temp() pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": { "callback": {} }}, mode="default", ) iterator = execute_run_iterator(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance=instance) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() events = [ record.dagster_event for record in records if record.is_dagster_event ] messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len([event for event in events if event.is_pipeline_failure]) > 0 assert len([message for message in messages if message == "CLEANING A"]) > 0 assert len([message for message in messages if message == "CLEANING B"]) > 0
def test_execute_run_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) instance = DagsterInstance.local_temp() pipeline_def = PipelineDefinition( name='basic_resource_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ 'a': resource_a, 'b': resource_b }, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, environment_dict={'loggers': { 'callback': {} }}, mode='default', ) iterator = execute_run_iterator(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance=instance) event_type = None while event_type != 'STEP_START': event = next(iterator) event_type = event.event_type_value iterator.close() events = [ record.dagster_event for record in records if record.is_dagster_event ] messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len([event for event in events if event.is_pipeline_failure]) > 0 assert len([message for message in messages if message == 'CLEANING A']) > 0 assert len([message for message in messages if message == 'CLEANING B']) > 0
def yield_empty_pipeline_context(run_id=None, instance=None): pipeline = InMemoryExecutablePipeline(PipelineDefinition([])) instance = check.opt_inst_param( instance, 'instance', DagsterInstance, default=DagsterInstance.ephemeral() ) pipeline_run = instance.create_run( run_id=run_id, pipeline_name='<empty>', pipeline_snapshot=None ) with scoped_pipeline_context( create_execution_plan(pipeline), {}, pipeline_run, instance, ) as context: yield context
def execute_pipeline(self, _, pipeline_def, pipeline_run, instance): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) event_list = [] self._active.add(pipeline_run.run_id) for event in execute_run_iterator( InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance): event_list.append(event) self._active.remove(pipeline_run.run_id) return PipelineExecutionResult(pipeline_def, pipeline_run.run_id, event_list, lambda: None)
def test_execution_plan_subset_strict_resources_within_composite(): resources_initted = {} pipeline_def = create_composite_solid_pipeline(resources_initted) instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( pipeline_def, step_keys_to_execute=['wraps_b.consumes_resource_b.compute'], ) result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {'b'}
def test_run_group(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline('foo_pipeline') runs = [execute_pipeline(foo_pipeline, instance=instance)] root_run_id = runs[-1].run_id for _ in range(3): # https://github.com/dagster-io/dagster/issues/2433 run = instance.create_run_for_pipeline( foo_pipeline, parent_run_id=root_run_id, root_run_id=root_run_id, tags={ PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id }, ) execute_run(InMemoryExecutablePipeline(foo_pipeline), run, instance) runs.append(run) context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result_one = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': root_run_id}, ) assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup' assert len(result_one.data['runGroupOrError']['runs']) == 4 result_two = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': runs[-1].run_id}, ) assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup' assert len(result_two.data['runGroupOrError']['runs']) == 4 assert (result_one.data['runGroupOrError']['rootRunId'] == result_two.data['runGroupOrError']['rootRunId']) assert (result_one.data['runGroupOrError']['runs'] == result_two.data['runGroupOrError']['runs'])
def test_execution_plan_subset_with_aliases(): resources_initted = {} @resource def resource_a(_): resources_initted['a'] = True yield 'A' @resource def resource_b(_): resources_initted['b'] = True yield 'B' @solid(required_resource_keys={'a'}) def consumes_resource_a(context): assert context.resources.a == 'A' @solid(required_resource_keys={'b'}) def consumes_resource_b(context): assert context.resources.b == 'B' @pipeline( mode_defs=[ ModeDefinition(resource_defs={ 'a': resource_a, 'b': resource_b, }) ], ) def selective_init_test_pipeline_with_alias(): consumes_resource_a() consumes_resource_b.alias('b_alias')() instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( selective_init_test_pipeline_with_alias, step_keys_to_execute=['b_alias.compute'], ) result = execute_run( InMemoryExecutablePipeline(selective_init_test_pipeline_with_alias), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {'b'}
def test_execution_plan_subset_strict_resources(): resources_initted = {} instance = DagsterInstance.ephemeral() pipeline_def = get_resource_init_pipeline(resources_initted) pipeline_run = instance.create_run_for_pipeline( pipeline_def, step_keys_to_execute=["consumes_resource_b.compute"], ) result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {"b"}
def test_single_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['add_one.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def), reexecution_pipeline_run, instance) assert reexecution_result.success assert reexecution_result.result_for_solid( 'return_one').output_value() == None assert reexecution_result.result_for_solid('add_one').output_value() == 2
def test_two_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 @pipeline def two_step_reexec(): add_one(add_one(return_one())) instance = DagsterInstance.ephemeral() environment_dict = {'storage': {'filesystem': {}}} pipeline_result = execute_pipeline(two_step_reexec, environment_dict=environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one_2').output_value() == 3 reexecution_pipeline_run = instance.create_run_for_pipeline( two_step_reexec, environment_dict=environment_dict, step_keys_to_execute=['add_one.compute', 'add_one_2.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run( InMemoryExecutablePipeline(two_step_reexec), reexecution_pipeline_run, instance=instance) assert reexecution_result.success assert reexecution_result.result_for_solid( 'return_one').output_value() == None assert reexecution_result.result_for_solid('add_one_2').output_value() == 3
def in_mp_process(cls, handle, pipeline_run, instance_ref, term_event): """ Execute pipeline using message queue as a transport """ run_id = pipeline_run.run_id pipeline_name = pipeline_run.pipeline_name instance = DagsterInstance.from_ref(instance_ref) pid = os.getpid() instance.report_engine_event( 'Started process for pipeline (pid: {pid}).'.format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end='dagit_subprocess_init'), cls, ) start_termination_thread(term_event) try: handle.build_repository_definition() pipeline_def = handle.with_pipeline_name( pipeline_name).build_pipeline_definition() except Exception: # pylint: disable=broad-except instance.report_engine_event( 'Failed attempting to load pipeline "{}"'.format( pipeline_name), pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) return try: event_list = [] for event in execute_run_iterator( InMemoryExecutablePipeline( pipeline_def.build_sub_pipeline( pipeline_run.selector.solid_subset)), pipeline_run, instance, ): event_list.append(event) return PipelineExecutionResult(pipeline_def, run_id, event_list, lambda: None) # Add a DagsterEvent for unexpected exceptions # Explicitly ignore KeyboardInterrupts since they are used for termination except DagsterSubprocessError as err: if not all([ err_info.cls_name == 'KeyboardInterrupt' for err_info in err.subprocess_error_infos ]): instance.report_engine_event( 'An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.', pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) except Exception: # pylint: disable=broad-except instance.report_engine_event( 'An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.', pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) finally: instance.report_engine_event( 'Process for pipeline exited (pid: {pid}).'.format(pid=pid), pipeline_run, cls=cls, )
def test_reexecution_fs_storage(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline( pipeline_def, environment_dict={'storage': { 'filesystem': {} }}, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 2 assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 assert reexecution_result.result_for_solid('add_one').output_value() == 2 reexecution_run = instance.get_run_by_id(reexecution_result.run_id) assert reexecution_run.parent_run_id == pipeline_result.run_id assert reexecution_run.root_run_id == pipeline_result.run_id pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=pipeline_result.run_id, ) grandchild_result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert grandchild_result.success assert len(grandchild_result.solid_result_list) == 2 assert grandchild_result.result_for_solid('return_one').output_value() == 1 assert grandchild_result.result_for_solid('add_one').output_value() == 2 grandchild_run = instance.get_run_by_id(grandchild_result.run_id) assert grandchild_run.parent_run_id == reexecution_result.run_id assert grandchild_run.root_run_id == pipeline_result.run_id
def test_reexecution_fs_storage_with_subset(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['return_one.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result_no_subset = execute_run( InMemoryExecutablePipeline(pipeline_def), reexecution_pipeline_run, instance) assert reexecution_result_no_subset.success assert len(reexecution_result_no_subset.solid_result_list) == 2 assert reexecution_result_no_subset.result_for_solid('add_one').skipped assert reexecution_result_no_subset.result_for_solid( 'return_one').output_value() == 1 pipeline_result_subset = execute_pipeline( pipeline_def, environment_dict=environment_dict, instance=instance, solid_selection=['return_one'], ) assert pipeline_result_subset.success assert len(pipeline_result_subset.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_subset.result_for_solid('add_one') assert pipeline_result_subset.result_for_solid( 'return_one').output_value() == 1 reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result_subset.run_id, root_run_id=pipeline_result_subset.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['return_one.compute'], ) reexecution_result = execute_run(InMemoryExecutablePipeline(pipeline_def), reexecution_pipeline_run, instance) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_subset.result_for_solid('add_one') assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 with pytest.raises( DagsterExecutionStepNotFoundError, match=re.escape( 'Execution plan does not contain step: add_one.compute'), ): instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result_subset.run_id, root_run_id=pipeline_result_subset.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['add_one.compute'], ) re_reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=reexecution_result.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['return_one.compute'], ) re_reexecution_result = execute_run( InMemoryExecutablePipeline(pipeline_def), re_reexecution_pipeline_run, instance) assert re_reexecution_result.success assert len(re_reexecution_result.solid_result_list) == 1 assert re_reexecution_result.result_for_solid( 'return_one').output_value() == 1 with pytest.raises( DagsterExecutionStepNotFoundError, match=re.escape('Execution plan does not contain step: add_one'), ): instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=reexecution_result.run_id, solids_to_execute={'return_one'}, step_keys_to_execute=['add_one.compute'], )
def _check_execute_pipeline_args(fn_name, pipeline, environment_dict, mode, preset, tags, run_config, instance): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, default=RunConfig()) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) check.invariant( run_config.mode is None or pipeline_preset.mode == run_config.mode, 'The mode set in preset \'{preset}\' (\'{preset_mode}\') does not agree with the mode ' 'set in the `run_config` (\'{run_config_mode}\')'.format( preset=preset, preset_mode=pipeline_preset.mode, run_config_mode=run_config.mode), ) if pipeline_preset.environment_dict is not None: check.invariant( (not environment_dict) or (pipeline_preset.environment_dict == environment_dict), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `environment_dict` argument.'.format( preset=preset), ) environment_dict = pipeline_preset.environment_dict if pipeline_preset.solid_subset is not None: pipeline = pipeline.build_sub_pipeline( pipeline_preset.solid_subset) check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if run_config.mode is not None or run_config.tags: warnings.warn(( 'In 0.8.0, the use of `run_config` to set pipeline mode and tags will be ' 'deprecated. Please use the `mode` and `tags` arguments to `{fn_name}` ' 'instead.').format(fn_name=fn_name)) if run_config.mode is not None: if mode is not None: check.invariant( run_config.mode == mode, 'Mode \'{mode}\' does not agree with the mode set in the `run_config`: ' '\'{run_config_mode}\''.format( mode=mode, run_config_mode=run_config.mode), ) mode = run_config.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if not pipeline_def.is_single_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(merge_dicts(pipeline_def.tags, run_config.tags or {}), tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, environment_dict, mode=mode, step_keys_to_execute=run_config.step_keys_to_execute, ) return pipeline, environment_dict, instance, mode, tags, run_config, execution_plan
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() environment_dict = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) result = execute_pipeline(pipeline_def, environment_dict=environment_dict, instance=instance) assert result.success intermediates_manager = IntermediateStoreIntermediatesManager( build_fs_intermediate_store(instance.intermediates_directory, result.run_id)) assert (intermediates_manager.get_intermediate( None, Int, StepOutputHandle('add_one.compute')).obj == 4) assert (intermediates_manager.get_intermediate( None, Int, StepOutputHandle('add_two.compute')).obj == 6) ## re-execute add_two pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['add_two.compute'], parent_run_id=result.run_id, root_run_id=result.run_id, ) pipeline_reexecution_result = execute_run( InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events intermediates_manager = IntermediateStoreIntermediatesManager( build_fs_intermediate_store(instance.intermediates_directory, result.run_id)) assert (intermediates_manager.get_intermediate( None, Int, StepOutputHandle('add_one.compute')).obj == 4) assert (intermediates_manager.get_intermediate( None, Int, StepOutputHandle('add_two.compute')).obj == 6) assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute') with pytest.raises(DagsterExecutionStepNotFoundError, match='Execution plan does not contain step'): pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['nope.compute'], parent_run_id=result.run_id, root_run_id=result.run_id, )