def test_execution_plan_wrong_run_id(): pipeline_def = define_addy_pipeline() unrun_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterRunNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig( storage_mode=RunStorageMode.FILESYSTEM, reexecution_config=ReexecutionConfig( previous_run_id=unrun_id, step_output_handles=[StepOutputHandle('add_one.transform')], ), ), ) assert str( exc_info.value ) == 'Run id {} set as previous run id was not found in run storage'.format(unrun_id) assert exc_info.value.invalid_run_id == unrun_id
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.IN_MEMORY, run_id=old_run_id), ) assert result.success ## re-execute add_two new_run_id = str(uuid.uuid4()) in_memory_run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.transform')], ), storage_mode=RunStorageMode.IN_MEMORY, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=in_memory_run_config, step_keys_to_execute=['add_two.transform'], )
def test_using_s3_for_subplan(s3_bucket): pipeline = define_inty_pipeline() environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.transform') step_keys = ['return_one.transform'] run_id = str(uuid.uuid4()) try: return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, )) assert get_step_output(return_one_step_events, 'return_one.transform') with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: assert has_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform') assert get_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.transform'], )) assert get_step_output(add_one_step_events, 'add_one.transform') with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: assert has_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform') assert get_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform', Int) == 2 finally: with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: rm_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform') rm_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform')
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan(execution_plan, step_keys_to_execute=['nope']) assert exc_info.value.step_key == 'nope' assert str(exc_info.value) == 'Execution plan does not contain step "nope"'
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) run_id = str(uuid.uuid4()) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['nope'], )
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) run_id = str(uuid.uuid4()) with pytest.raises(DagsterStepOutputNotFoundError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.transform'], )
def test_using_file_system_for_subplan_multiprocessing(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.transform') step_keys = ['return_one.transform'] run_id = str(uuid.uuid4()) return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id, executor_config=MultiprocessExecutorConfig( define_inty_pipeline)), step_keys_to_execute=step_keys, )) assert get_step_output(return_one_step_events, 'return_one.transform') assert has_filesystem_intermediate(run_id, 'return_one.transform') assert get_filesystem_intermediate(run_id, 'return_one.transform', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id, executor_config=MultiprocessExecutorConfig( define_inty_pipeline)), step_keys_to_execute=['add_one.transform'], )) assert get_step_output(add_one_step_events, 'add_one.transform') assert has_filesystem_intermediate(run_id, 'add_one.transform') assert get_filesystem_intermediate(run_id, 'add_one.transform', Int) == 2
def test_execution_plan_source_step(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan( pipeline_def, subset_info=ExecutionPlanSubsetInfo.only_subset( included_step_keys=['return_one.transform']), ) step_events = execute_plan(execution_plan) assert len(step_events) == 1 assert step_events[0].success_data.value == 1
def test_execution_plan_wrong_invalid_output_name(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.transform', 'not_an_output')], ), storage_mode=RunStorageMode.FILESYSTEM, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterStepOutputNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.transform'], ) assert str(exc_info.value) == ( 'You specified a step_output_handle in the ReexecutionConfig that does not exist: ' 'Step add_one.transform does not have output not_an_output.' ) assert exc_info.value.step_key == 'add_one.transform' assert exc_info.value.output_name == 'not_an_output'
def test_execution_plan_middle_step(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan(pipeline_def) step_results = execute_plan( pipeline_def, execution_plan, subset_info=ExecutionPlanSubsetInfo(['add_one.transform'], {'add_one.transform': { 'num': 2 }}), ) assert len(step_results) == 2 assert step_results[1].success_data.value == 3
def test_execution_plan_wrong_invalid_step_key(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('not_valid.transform')], ), storage_mode=RunStorageMode.FILESYSTEM, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.transform'], ) assert str(exc_info.value) == ( 'Step not_valid.transform was specified as a step from a previous run. ' 'It does not exist.' )
def test_reentrant_execute_plan(): called = {} @solid def has_tag(context): assert context.has_tag('foo') assert context.get_tag('foo') == 'bar' called['yup'] = True pipeline_def = PipelineDefinition(name='has_tag_pipeline', solids=[has_tag]) execution_plan = create_execution_plan(pipeline_def) step_events = execute_plan( execution_plan, execution_metadata=ExecutionMetadata(tags={'foo': 'bar'})) assert called['yup'] assert len(step_events) == 1
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) assert result.success assert get_filesystem_intermediate(result.run_id, 'add_one.transform', Int) == 4 assert get_filesystem_intermediate(result.run_id, 'add_two.transform', Int) == 6 ## re-execute add_two new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.transform')], ), storage_mode=RunStorageMode.FILESYSTEM, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) step_events = execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.transform'], ) assert get_filesystem_intermediate(new_run_id, 'add_one.transform', Int) == 4 assert get_filesystem_intermediate(new_run_id, 'add_two.transform', Int) == 6 assert not get_step_output_event(step_events, 'add_one.transform') assert get_step_output_event(step_events, 'add_two.transform')
def test_reentrant_execute_plan(): called = {} @solid def has_context_value(info): assert info.context.has_context_value('foo') assert info.context.get_context_value('foo') == 'bar' called['yup'] = True pipeline_def = PipelineDefinition(name='has_context_value_pipeline', solids=[has_context_value]) execution_plan = create_execution_plan(pipeline_def) step_results = execute_plan( pipeline_def, execution_plan, reentrant_info=ReentrantInfo(context_stack={'foo': 'bar'})) assert called['yup'] assert len(step_results) == 1
def test_reentrant_execute_plan(): called = {} @solid def has_tag(context): assert context.has_tag('foo') assert context.get_tag('foo') == 'bar' called['yup'] = True pipeline_def = PipelineDefinition(name='has_tag_pipeline', solids=[has_tag]) execution_plan = create_execution_plan(pipeline_def) step_events = execute_plan(execution_plan, run_config=RunConfig(tags={'foo': 'bar'})) assert called['yup'] assert len(step_events) == 3 assert step_events[0].tags['foo'] == 'bar'
def test_execution_plan_simple_two_steps(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan(pipeline_def) assert isinstance(execution_plan.steps, list) assert len(execution_plan.steps) == 2 assert execution_plan.get_step_by_key('return_one.transform') assert execution_plan.get_step_by_key('add_one.transform') step_results = execute_plan(pipeline_def, execution_plan) assert len(step_results) == 2 assert step_results[0].step.key == 'return_one.transform' assert step_results[0].success assert step_results[0].success_data.value == 1 assert step_results[1].step.key == 'add_one.transform' assert step_results[1].success assert step_results[1].success_data.value == 2
def test_execution_plan_simple_two_steps(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan(pipeline_def) assert isinstance(execution_plan.steps, list) assert len(execution_plan.steps) == 2 assert execution_plan.get_step_by_key('return_one.transform') assert execution_plan.get_step_by_key('add_one.transform') step_events = execute_plan(execution_plan) # start, out, success, start, out, success assert len(step_events) == 6 assert step_events[1].step_key == 'return_one.transform' assert step_events[1].is_successful_output assert step_events[1].step_output_data.value_repr == '1' assert step_events[4].step_key == 'add_one.transform' assert step_events[4].is_successful_output assert step_events[4].step_output_data.value_repr == '2'
def test_execution_plan_two_outputs(): @solid(outputs=[ OutputDefinition(types.Int, 'num_one'), OutputDefinition(types.Int, 'num_two') ]) def return_one_two(_context): yield Result(1, 'num_one') yield Result(2, 'num_two') pipeline_def = PipelineDefinition(name='return_one_two_pipeline', solids=[return_one_two]) execution_plan = create_execution_plan(pipeline_def) step_events = execute_plan(execution_plan) assert step_events[1].step_key == 'return_one_two.transform' assert step_events[1].step_output_data.value_repr == '1' assert step_events[1].step_output_data.output_name == 'num_one' assert step_events[2].step_key == 'return_one_two.transform' assert step_events[2].step_output_data.value_repr == '2' assert step_events[2].step_output_data.output_name == 'num_two'
def test_execution_plan_two_outputs(): @solid(outputs=[ OutputDefinition(types.Int, 'num_one'), OutputDefinition(types.Int, 'num_two') ]) def return_one_two(_info): yield Result(1, 'num_one') yield Result(2, 'num_two') pipeline_def = PipelineDefinition(name='return_one_two_pipeline', solids=[return_one_two]) execution_plan = create_execution_plan(pipeline_def) step_results = execute_plan(pipeline_def, execution_plan) # FIXME: we should change this to be *single* result with two outputs assert step_results[0].step.key == 'return_one_two.transform' assert step_results[0].success_data.value == 1 assert step_results[0].success_data.output_name == 'num_one' assert step_results[1].step.key == 'return_one_two.transform' assert step_results[1].success_data.value == 2 assert step_results[1].success_data.output_name == 'num_two'
def _execute_plan_chain_actual_execute_or_error(execute_plan_args, dauphin_pipeline, _evaluate_env_config_result): graphql_execution_metadata = execute_plan_args.execution_metadata run_id = graphql_execution_metadata.get('runId') tags = tags_from_graphql_execution_metadata(graphql_execution_metadata) execution_plan = create_execution_plan( pipeline=dauphin_pipeline.get_dagster_pipeline(), environment_dict=execute_plan_args.environment_dict, ) if execute_plan_args.step_keys: for step_key in execute_plan_args.step_keys: if not execution_plan.has_step(step_key): return execute_plan_args.graphene_info.schema.type_named( 'InvalidStepError')(invalid_step_key=step_key) run_config = RunConfig(run_id=run_id, tags=tags) step_events = list( execute_plan( execution_plan=execution_plan, environment_dict=execute_plan_args.environment_dict, run_config=run_config, step_keys_to_execute=execute_plan_args.step_keys, )) return execute_plan_args.graphene_info.schema.type_named( 'ExecutePlanSuccess')( pipeline=dauphin_pipeline, has_failures=any( se for se in step_events if se.event_type == DagsterEventType.STEP_FAILURE), step_events=list( map(lambda se: _create_dauphin_step_event(execution_plan, se), step_events)), )