def test_create_execution_plan_with_bad_inputs(): with pytest.raises(PipelineConfigEvaluationError): create_execution_plan( define_diamond_pipeline(), {'solids': { 'add_three': { 'inputs': { 'num': 3 } } }})
def test_basic_int_execution_plan(): execution_plan = create_execution_plan( single_int_output_pipeline(), { 'solids': { 'return_one': { 'outputs': [{ 'result': { 'json': { 'path': 'dummy.json' } } }] } } }, ) assert len(execution_plan.steps) == 3 steps = execution_plan.topological_steps() assert steps[0].key == 'return_one.transform' assert steps[1].key == 'return_one.materialization.output.result.0' assert steps[2].key == 'return_one.materialization.output.result.join'
def test_execution_plan_wrong_run_id(): pipeline_def = define_addy_pipeline() unrun_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterRunNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig( storage_mode=RunStorageMode.FILESYSTEM, reexecution_config=ReexecutionConfig( previous_run_id=unrun_id, step_output_handles=[StepOutputHandle('add_one.transform')], ), ), ) assert str( exc_info.value ) == 'Run id {} set as previous run id was not found in run storage'.format(unrun_id) assert exc_info.value.invalid_run_id == unrun_id
def test_running(): run_id = 'run-1' repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_passing_pipeline', module_name=None, ) ) pipeline = define_passing_pipeline() env_config = { 'solids': { 'sum_solid': {'inputs': {'num': {'csv': {'path': script_relative_path('num.csv')}}}} } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config) ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type(events, EventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type(events, EventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_create_subplan_middle_step(): pipeline_def = define_two_int_pipeline() typed_environment = create_typed_environment(pipeline_def, None) execution_plan = create_execution_plan(pipeline_def) with yield_context(pipeline_def, typed_environment, ExecutionMetadata()) as context: subplan = create_subplan( ExecutionPlanInfo(context=context, pipeline=pipeline_def, environment=typed_environment), StepBuilderState(pipeline_name=pipeline_def.name), execution_plan, ExecutionPlanSubsetInfo(['add_one.transform'], {'add_one.transform': { 'num': 2 }}), ) assert subplan steps = subplan.topological_steps() assert len(steps) == 2 assert steps[0].key == 'add_one.transform.input.num.value' assert not steps[0].step_inputs assert len(steps[0].step_outputs) == 1 assert steps[1].key == 'add_one.transform' assert len(steps[1].step_inputs) == 1 step_input = steps[1].step_inputs[0] assert step_input.prev_output_handle.step.key == 'add_one.transform.input.num.value' assert step_input.prev_output_handle.output_name == VALUE_OUTPUT assert len(steps[1].step_outputs) == 1 assert len(subplan.topological_steps()) == 2 assert [step.key for step in subplan.topological_steps()] == [ 'add_one.transform.input.num.value', 'add_one.transform', ]
def test_failing(): run_id = 'run-1' repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_failing_pipeline', module_name=None, ) ) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': {'inputs': {'num': {'csv': {'path': script_relative_path('num.csv')}}}} } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config) ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_external_execution_input_marshal_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(IOError): execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'num': 'nope' }}, execution_metadata=ExecutionMetadata(), throw_on_user_error=True, ) results = execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'num': 'nope' }}, execution_metadata=ExecutionMetadata(), throw_on_user_error=False, ) assert len(results) == 1 marshal_result = results[0] assert marshal_result.success is False assert marshal_result.step.kind == StepKind.UNMARSHAL_INPUT assert isinstance(marshal_result.failure_data.dagster_error.user_exception, IOError)
def test_external_execution_output_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterMarshalOutputError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['return_one.transform', 'add_one.transform'], outputs_to_marshal={ 'add_one.transform': [{ 'output': 'result', 'path': 23434 }] }, execution_metadata=ExecutionMetadata(), ) assert ( str(exc_info.value) == 'Error during the marshalling of output result in step add_one.transform' ) assert exc_info.value.output_name == 'result' assert exc_info.value.step_key == 'add_one.transform'
def test_basic_int_multiple_serializations_execution_plan(): execution_plan = create_execution_plan( single_int_output_pipeline(), { 'solids': { 'return_one': { 'outputs': [ {'result': {'json': {'path': 'dummy_one.json'}}}, {'result': {'json': {'path': 'dummy_two.json'}}}, ] } } }, ) assert len(execution_plan.steps) == 4 steps = execution_plan.topological_steps() assert steps[0].key == 'return_one.transform' assert_plan_topological_level( steps, [1, 2], ['return_one.outputs.result.materialize.0', 'return_one.outputs.result.materialize.1'], ) assert steps[3].key == 'return_one.outputs.result.materialize.join'
def test_create_subplan_middle_step(): subplan = create_execution_plan( define_two_int_pipeline(), subset_info=ExecutionPlanSubsetInfo.with_input_values( ['add_one.transform'], {'add_one.transform': { 'num': 2 }}), ) assert subplan steps = subplan.topological_steps() assert len(steps) == 2 assert steps[0].key == 'add_one.transform.input.num.value' assert not steps[0].step_inputs assert len(steps[0].step_outputs) == 1 assert steps[1].key == 'add_one.transform' assert len(steps[1].step_inputs) == 1 step_input = steps[1].step_inputs[0] assert step_input.prev_output_handle.step.key == 'add_one.transform.input.num.value' assert step_input.prev_output_handle.output_name == VALUE_OUTPUT assert len(steps[1].step_outputs) == 1 assert len(subplan.topological_steps()) == 2 assert [step.key for step in subplan.topological_steps()] == [ 'add_one.transform.input.num.value', 'add_one.transform', ]
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.IN_MEMORY, run_id=old_run_id), ) assert result.success ## re-execute add_two new_run_id = str(uuid.uuid4()) in_memory_run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[StepOutputHandle('add_one.transform')], ), storage_mode=RunStorageMode.IN_MEMORY, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=in_memory_run_config, step_keys_to_execute=['add_two.transform'], )
def test_execution_crash(): run_id = 'run-1' repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_crashy_pipeline', module_name=None, ) ) pipeline = define_crashy_pipeline() env_config = { 'solids': { 'sum_solid': {'inputs': {'num': {'csv': {'path': script_relative_path('num.csv')}}}} } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config) ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] assert last_log.message == ( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' ).format(run_id=run_id)
def test_using_s3_for_subplan(s3_bucket): pipeline = define_inty_pipeline() environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.transform') step_keys = ['return_one.transform'] run_id = str(uuid.uuid4()) try: return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, )) assert get_step_output(return_one_step_events, 'return_one.transform') with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: assert has_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform') assert get_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.transform'], )) assert get_step_output(add_one_step_events, 'add_one.transform') with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: assert has_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform') assert get_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform', Int) == 2 finally: with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: rm_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform') rm_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform')
def test_basic_int_and_string_execution_plan(): pipeline = multiple_output_pipeline() execution_plan = create_execution_plan( pipeline, { 'solids': { 'return_one_and_foo': { 'outputs': [ {'string': {'json': {'path': 'dummy_string.json'}}}, {'number': {'json': {'path': 'dummy_number.json'}}}, ] } } }, ) assert len(execution_plan.steps) == 5 steps = execution_plan.topological_steps() assert steps[0].key == 'return_one_and_foo.transform' assert_plan_topological_level( steps, [1, 2], [ 'return_one_and_foo.outputs.string.materialize.0', 'return_one_and_foo.outputs.number.materialize.0', ], ) assert_plan_topological_level( steps, [3, 4], [ 'return_one_and_foo.outputs.string.materialize.join', 'return_one_and_foo.outputs.number.materialize.join', ], ) transform_step = execution_plan.get_step_by_key('return_one_and_foo.transform') string_mat_step = execution_plan.get_step_by_key( 'return_one_and_foo.outputs.string.materialize.0' ) assert len(string_mat_step.step_inputs) == 1 assert string_mat_step.step_inputs[0].prev_output_handle == StepOutputHandle.from_step( step=transform_step, output_name='string' ) string_mat_join_step = execution_plan.get_step_by_key( 'return_one_and_foo.outputs.string.materialize.join' ) assert len(string_mat_join_step.step_inputs) == 1 assert string_mat_join_step.step_inputs[0].prev_output_handle == StepOutputHandle.from_step( step=string_mat_step, output_name=MATERIALIZATION_THUNK_OUTPUT )
def test_execution_plan_source_step(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan( pipeline_def, subset_info=ExecutionPlanSubsetInfo.only_subset( included_step_keys=['return_one.transform']), ) step_events = execute_plan(execution_plan) assert len(step_events) == 1 assert step_events[0].success_data.value == 1
def test_compute_noop_node(): pipeline = silencing_pipeline(solids=[noop]) plan = create_execution_plan(pipeline) assert len(plan.steps) == 1 outputs = list( execute_step(plan.steps[0], create_test_runtime_execution_context(), {})) assert outputs[0].success_data.value == 'foo'
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan(execution_plan, step_keys_to_execute=['nope']) assert exc_info.value.step_key == 'nope' assert str(exc_info.value) == 'Execution plan does not contain step "nope"'
def test_topological_sort(): plan = create_execution_plan(define_diamond_pipeline()) levels = plan.topological_step_levels() assert len(levels) == 3 assert [step.key for step in levels[0]] == ['return_two.transform'] assert [step.key for step in levels[1] ] == ['add_three.transform', 'mult_three.transform'] assert [step.key for step in levels[2]] == ['adder.transform']
def test_create_subplan_source_step(): subplan = create_execution_plan( define_two_int_pipeline(), subset_info=ExecutionPlanSubsetInfo.only_subset( ['return_one.transform']), ) assert subplan assert len(subplan.steps) == 1 assert subplan.steps[0].key == 'return_one.transform' assert not subplan.steps[0].step_inputs assert len(subplan.steps[0].step_outputs) == 1 assert len(subplan.topological_steps()) == 1
def test_external_execution_step_for_output_missing(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError): execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], outputs_to_marshal={'nope': [MarshalledOutput('nope', 'nope')]}, execution_metadata=ExecutionMetadata(), )
def create_plan(pipeline): config_or_error = _config_or_error_from_pipeline( graphene_info, pipeline, config) return config_or_error.chain( lambda evaluate_value_result: graphene_info.schema.type_named( 'ExecutionPlan')( pipeline, create_execution_plan( pipeline.get_dagster_pipeline(), evaluate_value_result.value, ExecutionMetadata(), ), ))
def test_execution_plan_middle_step(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan(pipeline_def) step_results = execute_plan( pipeline_def, execution_plan, subset_info=ExecutionPlanSubsetInfo(['add_one.transform'], {'add_one.transform': { 'num': 2 }}), ) assert len(step_results) == 2 assert step_results[1].success_data.value == 3
def _start_execution(validated_config_either): new_run_id = run_id if run_id else make_new_run_id() execution_plan = create_execution_plan( pipeline.get_dagster_pipeline(), validated_config_either.value) run = pipeline_run_storage.create_run( new_run_id, selector, environment_dict, execution_plan, reexecution_config, step_keys_to_execute, ) pipeline_run_storage.add_run(run) if step_keys_to_execute: for step_key in step_keys_to_execute: if not execution_plan.has_step(step_key): return graphene_info.schema.type_named( 'InvalidStepError')(invalid_step_key=step_key) if reexecution_config and reexecution_config.step_output_handles: for step_output_handle in reexecution_config.step_output_handles: if not execution_plan.has_step( step_output_handle.step_key): return graphene_info.schema.type_named( 'InvalidStepError')( invalid_step_key=step_output_handle.step_key) step = execution_plan.get_step_by_key( step_output_handle.step_key) if not step.has_step_output( step_output_handle.output_name): return graphene_info.schema.type_named( 'InvalidOutputError')( step_key=step_output_handle.step_key, invalid_output_name=step_output_handle. output_name, ) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.repository_container, pipeline.get_dagster_pipeline(), run, raise_on_error=graphene_info.context.raise_on_error, ) return graphene_info.schema.type_named( 'StartPipelineExecutionSuccess')( run=graphene_info.schema.type_named('PipelineRun')(run))
def test_external_execution_output_code_error_throw_on_user_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(Exception) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['user_throw_exception.transform'], execution_metadata=ExecutionMetadata(), throw_on_user_error=True, ) assert str(exc_info.value) == 'whoops'
def test_external_execution_step_for_input_missing(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'nope': { 'nope': 'nope' }}, execution_metadata=ExecutionMetadata(), ) assert exc_info.value.step_key == 'nope'
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) run_id = str(uuid.uuid4()) with pytest.raises(DagsterStepOutputNotFoundError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.transform'], )
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) run_id = str(uuid.uuid4()) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['nope'], )
def test_external_execution_marshal_output_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) # guaranteed that folder does not exist hardcoded_uuid = '83fb4ace-5cab-459d-99b6-2ca9808c54a1' outputs_to_marshal = { 'add_one.transform': [ MarshalledOutput( output_name='result', marshalling_key='{uuid}/{uuid}'.format(uuid=hardcoded_uuid)) ] } with pytest.raises(IOError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['return_one.transform', 'add_one.transform'], outputs_to_marshal=outputs_to_marshal, execution_metadata=ExecutionMetadata(), throw_on_user_error=True, ) assert 'No such file or directory' in str(exc_info.value) results = execute_externalized_plan( pipeline, execution_plan, ['return_one.transform', 'add_one.transform'], outputs_to_marshal=outputs_to_marshal, execution_metadata=ExecutionMetadata(), throw_on_user_error=False, ) assert len(results) == 3 results_dict = {result.step.key: result for result in results} assert results_dict['return_one.transform'].success is True assert results_dict['add_one.transform'].success is True assert results_dict[ 'add_one.transform.marshal-output.result'].success is False
def test_external_execution_output_missing(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterMarshalOutputNotFoundError): execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], outputs_to_marshal={ 'add_one.transform': [{ 'output': 'nope', 'path': 'nope' }] }, execution_metadata=ExecutionMetadata(), )
def test_create_subplan_source_step(): pipeline_def = define_two_int_pipeline() typed_environment = create_typed_environment(pipeline_def, None) execution_plan = create_execution_plan(pipeline_def) with yield_context(pipeline_def, typed_environment) as context: subplan = create_subplan( ExecutionPlanInfo(context=context, pipeline=pipeline_def, environment=typed_environment), execution_plan, ExecutionPlanSubsetInfo(['return_one.transform']), ) assert subplan assert len(subplan.steps) == 1 assert subplan.steps[0].key == 'return_one.transform' assert not subplan.steps[0].step_inputs assert len(subplan.steps[0].step_outputs) == 1 assert len(subplan.topological_steps()) == 1