def test_execution_plan_wrong_run_id(): pipeline_def = define_addy_pipeline() unrun_id = str(uuid.uuid4()) environment_dict = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) run_config = RunConfig(reexecution_config=ReexecutionConfig( previous_run_id=unrun_id, step_output_handles=[StepOutputHandle('add_one.compute')])) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict, run_config=run_config) with pytest.raises(DagsterRunNotFoundError) as exc_info: execute_plan(execution_plan, environment_dict=environment_dict, run_config=run_config) assert str( exc_info.value ) == 'Run id {} set as previous run id was not found in run storage'.format( unrun_id) assert exc_info.value.invalid_run_id == unrun_id
def test_pipeline_step_key_subset_execution_wrong_step_key_in_subset(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) result = execute_pipeline(pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id)) assert result.success new_run_id = str(uuid.uuid4()) with pytest.raises(DagsterExecutionStepNotFoundError): execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute')], ), step_keys_to_execute=['nope'], ), )
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id) ) assert result.success ## re-execute add_two new_run_id = str(uuid.uuid4()) in_memory_run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute')] ), ) execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, run_config=in_memory_run_config ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=in_memory_run_config, step_keys_to_execute=['add_two.compute'], )
def test_pipeline_step_key_subset_execution_wrong_output_name_in_step_output_handles(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) instance = DagsterInstance.ephemeral() environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=env_with_fs(environment_dict), run_config=RunConfig(run_id=old_run_id), instance=instance, ) assert result.success assert result.run_id == old_run_id new_run_id = str(uuid.uuid4()) with pytest.raises(DagsterStepOutputNotFoundError): execute_pipeline( pipeline_def, environment_dict=env_with_fs(environment_dict), run_config=RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute', 'invalid_output')], ), step_keys_to_execute=['add_two.compute'], ), instance=instance, )
def test_retries(): fail = {'count': 0} @solid def fail_first_times(_, _start_fail): if fail['count'] < 1: fail['count'] += 1 raise Exception('blah') return 'okay perfect' @solid(output_defs=[ OutputDefinition(bool, 'start_fail', is_optional=True), OutputDefinition(bool, 'start_skip', is_optional=True), ]) def two_outputs(_): yield Output(True, 'start_fail') # won't yield start_skip @solid def will_be_skipped(_, _start_skip): pass # doesn't matter @solid def downstream_of_failed(_, input_str): return input_str @pipeline def pipe(): start_fail, start_skip = two_outputs() downstream_of_failed(fail_first_times(start_fail)) will_be_skipped(start_skip) env = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() result = execute_pipeline(pipe, environment_dict=env, instance=instance, raise_on_error=False) second_result = execute_pipeline( pipe, environment_dict=env, run_config=RunConfig( reexecution_config=ReexecutionConfig.from_previous_run(result)), instance=instance, ) assert second_result.success downstream_of_failed = second_result.result_for_solid( 'downstream_of_failed').output_value() assert downstream_of_failed == 'okay perfect' will_be_skipped = [ e for e in second_result.event_list if str(e.solid_handle) == 'will_be_skipped' ][0] assert str(will_be_skipped.event_type_value) == 'STEP_SKIPPED'
def test_execution_plan_wrong_invalid_step_key(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}) result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id) ) new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('not_valid.compute')], ), ) execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, run_config=run_config ) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.compute'], ) assert str(exc_info.value) == ( 'Step not_valid.compute was specified as a step from a previous run. ' 'It does not exist.' )
def to_reexecution_config(self): from dagster.core.execution.config import ReexecutionConfig from dagster.core.execution.plan.objects import StepOutputHandle return ReexecutionConfig( self.previousRunId, list(map(lambda g: StepOutputHandle(g.stepKey, g.outputName), self.stepOutputHandles)), )
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() old_run_id = str(uuid.uuid4()) environment_dict = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id), instance=instance, ) assert result.success store = build_fs_intermediate_store(instance.intermediates_directory, result.run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 ## re-execute add_two new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute')]), ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict, run_config=run_config) step_events = execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.compute'], instance=instance, ) store = build_fs_intermediate_store(instance.intermediates_directory, new_run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute')
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = { 'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } } } result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) assert result.success store = FileSystemIntermediateStore(result.run_id) assert store.get_intermediate(None, 'add_one.compute', Int) == 4 assert store.get_intermediate(None, 'add_two.compute', Int) == 6 ## re-execute add_two new_run_id = str(uuid.uuid4()) pipeline_reexecution_result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute')], ), storage_mode=RunStorageMode.FILESYSTEM, step_keys_to_execute=['add_two.compute'], ), ) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events store = FileSystemIntermediateStore(new_run_id) assert store.get_intermediate(None, 'add_one.compute', Int) == 4 assert store.get_intermediate(None, 'add_two.compute', Int) == 6 assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute')
def test_execution_plan_wrong_invalid_output_name(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = { 'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } } } result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[ StepOutputHandle('add_one.compute', 'not_an_output') ], ), storage_mode=RunStorageMode.FILESYSTEM, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterStepOutputNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.compute'], ) assert str(exc_info.value) == ( 'You specified a step_output_handle in the ReexecutionConfig that does not exist: ' 'Step add_one.compute does not have output not_an_output.') assert exc_info.value.step_key == 'add_one.compute' assert exc_info.value.output_name == 'not_an_output'
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() old_run_id = str(uuid.uuid4()) environment_dict = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}) result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id), instance=instance, ) assert result.success store = FilesystemIntermediateStore.for_instance(instance, result.run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 ## re-execute add_two new_run_id = str(uuid.uuid4()) pipeline_reexecution_result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.compute')], ), step_keys_to_execute=['add_two.compute'], ), instance=instance, ) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events store = FilesystemIntermediateStore.for_instance(instance, new_run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute')
def test_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline( pipeline_def, environment_dict={'storage': { 'filesystem': {} }}, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 reexecution_run_config = RunConfig(reexecution_config=ReexecutionConfig( previous_run_id=pipeline_result.run_id, step_output_handles=[StepOutputHandle('return_one.compute')], )) reexecution_result = execute_pipeline( pipeline_def, environment_dict={'storage': { 'filesystem': {} }}, run_config=reexecution_run_config, instance=instance, ) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 2 assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 assert reexecution_result.result_for_solid('add_one').output_value() == 2
def test_pipeline_step_key_subset_execution_wrong_output_name_in_step_output_handles( ): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = { 'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } } } result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) assert result.success new_run_id = str(uuid.uuid4()) with pytest.raises(DagsterStepOutputNotFoundError): execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[ StepOutputHandle('add_one.compute', 'invalid_output') ], ), storage_mode=RunStorageMode.FILESYSTEM, step_keys_to_execute=['add_two.compute'], ), )