コード例 #1
0
def test_execution_plan_wrong_run_id():
    pipeline_def = define_addy_pipeline()

    unrun_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    with pytest.raises(DagsterRunNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(
                storage_mode=RunStorageMode.FILESYSTEM,
                reexecution_config=ReexecutionConfig(
                    previous_run_id=unrun_id,
                    step_output_handles=[StepOutputHandle('add_one.transform')],
                ),
            ),
        )

    assert str(
        exc_info.value
    ) == 'Run id {} set as previous run id was not found in run storage'.format(unrun_id)

    assert exc_info.value.invalid_run_id == unrun_id
コード例 #2
0
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.IN_MEMORY, run_id=old_run_id),
    )

    assert result.success

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    in_memory_run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('add_one.transform')],
        ),
        storage_mode=RunStorageMode.IN_MEMORY,
    )

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=in_memory_run_config,
            step_keys_to_execute=['add_two.transform'],
        )
コード例 #3
0
def test_using_s3_for_subplan(s3_bucket):
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.transform')

    step_keys = ['return_one.transform']

    run_id = str(uuid.uuid4())

    try:
        return_one_step_events = list(
            execute_plan(
                execution_plan,
                environment_dict=environment_dict,
                run_config=RunConfig(run_id=run_id),
                step_keys_to_execute=step_keys,
            ))

        assert get_step_output(return_one_step_events, 'return_one.transform')
        with yield_pipeline_execution_context(
                pipeline, environment_dict,
                RunConfig(run_id=run_id)) as context:
            assert has_s3_intermediate(context, s3_bucket, run_id,
                                       'return_one.transform')
            assert get_s3_intermediate(context, s3_bucket, run_id,
                                       'return_one.transform', Int) == 1

        add_one_step_events = list(
            execute_plan(
                execution_plan,
                environment_dict=environment_dict,
                run_config=RunConfig(run_id=run_id),
                step_keys_to_execute=['add_one.transform'],
            ))

        assert get_step_output(add_one_step_events, 'add_one.transform')
        with yield_pipeline_execution_context(
                pipeline, environment_dict,
                RunConfig(run_id=run_id)) as context:
            assert has_s3_intermediate(context, s3_bucket, run_id,
                                       'add_one.transform')
            assert get_s3_intermediate(context, s3_bucket, run_id,
                                       'add_one.transform', Int) == 2
    finally:
        with yield_pipeline_execution_context(
                pipeline, environment_dict,
                RunConfig(run_id=run_id)) as context:
            rm_s3_intermediate(context, s3_bucket, run_id,
                               'return_one.transform')
            rm_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform')
コード例 #4
0
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(execution_plan, step_keys_to_execute=['nope'])

    assert exc_info.value.step_key == 'nope'

    assert str(exc_info.value) == 'Execution plan does not contain step "nope"'
コード例 #5
0
def test_using_file_system_for_subplan_invalid_step():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    run_id = str(uuid.uuid4())

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['nope'],
        )
コード例 #6
0
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    run_id = str(uuid.uuid4())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['add_one.transform'],
        )
コード例 #7
0
def test_using_file_system_for_subplan_multiprocessing():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.transform')

    step_keys = ['return_one.transform']

    run_id = str(uuid.uuid4())

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id,
                                 executor_config=MultiprocessExecutorConfig(
                                     define_inty_pipeline)),
            step_keys_to_execute=step_keys,
        ))

    assert get_step_output(return_one_step_events, 'return_one.transform')
    assert has_filesystem_intermediate(run_id, 'return_one.transform')
    assert get_filesystem_intermediate(run_id, 'return_one.transform',
                                       Int) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id,
                                 executor_config=MultiprocessExecutorConfig(
                                     define_inty_pipeline)),
            step_keys_to_execute=['add_one.transform'],
        ))

    assert get_step_output(add_one_step_events, 'add_one.transform')
    assert has_filesystem_intermediate(run_id, 'add_one.transform')
    assert get_filesystem_intermediate(run_id, 'add_one.transform', Int) == 2
コード例 #8
0
def test_execution_plan_source_step():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(
        pipeline_def,
        subset_info=ExecutionPlanSubsetInfo.only_subset(
            included_step_keys=['return_one.transform']),
    )
    step_events = execute_plan(execution_plan)

    assert len(step_events) == 1
    assert step_events[0].success_data.value == 1
コード例 #9
0
def test_execution_plan_wrong_invalid_output_name():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id),
    )

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('add_one.transform', 'not_an_output')],
        ),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    with pytest.raises(DagsterStepOutputNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=run_config,
            step_keys_to_execute=['add_two.transform'],
        )

    assert str(exc_info.value) == (
        'You specified a step_output_handle in the ReexecutionConfig that does not exist: '
        'Step add_one.transform does not have output not_an_output.'
    )

    assert exc_info.value.step_key == 'add_one.transform'
    assert exc_info.value.output_name == 'not_an_output'
コード例 #10
0
def test_execution_plan_middle_step():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(pipeline_def)
    step_results = execute_plan(
        pipeline_def,
        execution_plan,
        subset_info=ExecutionPlanSubsetInfo(['add_one.transform'],
                                            {'add_one.transform': {
                                                'num': 2
                                            }}),
    )

    assert len(step_results) == 2
    assert step_results[1].success_data.value == 3
コード例 #11
0
def test_execution_plan_wrong_invalid_step_key():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id),
    )

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('not_valid.transform')],
        ),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=run_config,
            step_keys_to_execute=['add_two.transform'],
        )

    assert str(exc_info.value) == (
        'Step not_valid.transform was specified as a step from a previous run. '
        'It does not exist.'
    )
コード例 #12
0
def test_reentrant_execute_plan():
    called = {}

    @solid
    def has_tag(context):
        assert context.has_tag('foo')
        assert context.get_tag('foo') == 'bar'
        called['yup'] = True

    pipeline_def = PipelineDefinition(name='has_tag_pipeline',
                                      solids=[has_tag])
    execution_plan = create_execution_plan(pipeline_def)

    step_events = execute_plan(
        execution_plan,
        execution_metadata=ExecutionMetadata(tags={'foo': 'bar'}))

    assert called['yup']
    assert len(step_events) == 1
コード例 #13
0
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()

    old_run_id = str(uuid.uuid4())
    environment_dict = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}}
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id),
    )

    assert result.success
    assert get_filesystem_intermediate(result.run_id, 'add_one.transform', Int) == 4
    assert get_filesystem_intermediate(result.run_id, 'add_two.transform', Int) == 6

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    run_config = RunConfig(
        run_id=new_run_id,
        reexecution_config=ReexecutionConfig(
            previous_run_id=result.run_id,
            step_output_handles=[StepOutputHandle('add_one.transform')],
        ),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)

    step_events = execute_plan(
        execution_plan,
        environment_dict=environment_dict,
        run_config=run_config,
        step_keys_to_execute=['add_two.transform'],
    )

    assert get_filesystem_intermediate(new_run_id, 'add_one.transform', Int) == 4
    assert get_filesystem_intermediate(new_run_id, 'add_two.transform', Int) == 6

    assert not get_step_output_event(step_events, 'add_one.transform')
    assert get_step_output_event(step_events, 'add_two.transform')
コード例 #14
0
def test_reentrant_execute_plan():
    called = {}

    @solid
    def has_context_value(info):
        assert info.context.has_context_value('foo')
        assert info.context.get_context_value('foo') == 'bar'
        called['yup'] = True

    pipeline_def = PipelineDefinition(name='has_context_value_pipeline',
                                      solids=[has_context_value])
    execution_plan = create_execution_plan(pipeline_def)

    step_results = execute_plan(
        pipeline_def,
        execution_plan,
        reentrant_info=ReentrantInfo(context_stack={'foo': 'bar'}))

    assert called['yup']
    assert len(step_results) == 1
コード例 #15
0
def test_reentrant_execute_plan():
    called = {}

    @solid
    def has_tag(context):
        assert context.has_tag('foo')
        assert context.get_tag('foo') == 'bar'
        called['yup'] = True

    pipeline_def = PipelineDefinition(name='has_tag_pipeline',
                                      solids=[has_tag])
    execution_plan = create_execution_plan(pipeline_def)

    step_events = execute_plan(execution_plan,
                               run_config=RunConfig(tags={'foo': 'bar'}))

    assert called['yup']
    assert len(step_events) == 3

    assert step_events[0].tags['foo'] == 'bar'
コード例 #16
0
def test_execution_plan_simple_two_steps():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(pipeline_def)

    assert isinstance(execution_plan.steps, list)
    assert len(execution_plan.steps) == 2

    assert execution_plan.get_step_by_key('return_one.transform')
    assert execution_plan.get_step_by_key('add_one.transform')

    step_results = execute_plan(pipeline_def, execution_plan)
    assert len(step_results) == 2

    assert step_results[0].step.key == 'return_one.transform'
    assert step_results[0].success
    assert step_results[0].success_data.value == 1

    assert step_results[1].step.key == 'add_one.transform'
    assert step_results[1].success
    assert step_results[1].success_data.value == 2
コード例 #17
0
def test_execution_plan_simple_two_steps():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(pipeline_def)

    assert isinstance(execution_plan.steps, list)
    assert len(execution_plan.steps) == 2

    assert execution_plan.get_step_by_key('return_one.transform')
    assert execution_plan.get_step_by_key('add_one.transform')

    step_events = execute_plan(execution_plan)
    # start, out, success, start, out, success
    assert len(step_events) == 6

    assert step_events[1].step_key == 'return_one.transform'
    assert step_events[1].is_successful_output
    assert step_events[1].step_output_data.value_repr == '1'

    assert step_events[4].step_key == 'add_one.transform'
    assert step_events[4].is_successful_output
    assert step_events[4].step_output_data.value_repr == '2'
コード例 #18
0
def test_execution_plan_two_outputs():
    @solid(outputs=[
        OutputDefinition(types.Int, 'num_one'),
        OutputDefinition(types.Int, 'num_two')
    ])
    def return_one_two(_context):
        yield Result(1, 'num_one')
        yield Result(2, 'num_two')

    pipeline_def = PipelineDefinition(name='return_one_two_pipeline',
                                      solids=[return_one_two])

    execution_plan = create_execution_plan(pipeline_def)

    step_events = execute_plan(execution_plan)

    assert step_events[1].step_key == 'return_one_two.transform'
    assert step_events[1].step_output_data.value_repr == '1'
    assert step_events[1].step_output_data.output_name == 'num_one'
    assert step_events[2].step_key == 'return_one_two.transform'
    assert step_events[2].step_output_data.value_repr == '2'
    assert step_events[2].step_output_data.output_name == 'num_two'
コード例 #19
0
def test_execution_plan_two_outputs():
    @solid(outputs=[
        OutputDefinition(types.Int, 'num_one'),
        OutputDefinition(types.Int, 'num_two')
    ])
    def return_one_two(_info):
        yield Result(1, 'num_one')
        yield Result(2, 'num_two')

    pipeline_def = PipelineDefinition(name='return_one_two_pipeline',
                                      solids=[return_one_two])

    execution_plan = create_execution_plan(pipeline_def)

    step_results = execute_plan(pipeline_def, execution_plan)

    # FIXME: we should change this to be *single* result with two outputs
    assert step_results[0].step.key == 'return_one_two.transform'
    assert step_results[0].success_data.value == 1
    assert step_results[0].success_data.output_name == 'num_one'
    assert step_results[1].step.key == 'return_one_two.transform'
    assert step_results[1].success_data.value == 2
    assert step_results[1].success_data.output_name == 'num_two'
コード例 #20
0
def _execute_plan_chain_actual_execute_or_error(execute_plan_args,
                                                dauphin_pipeline,
                                                _evaluate_env_config_result):
    graphql_execution_metadata = execute_plan_args.execution_metadata
    run_id = graphql_execution_metadata.get('runId')
    tags = tags_from_graphql_execution_metadata(graphql_execution_metadata)
    execution_plan = create_execution_plan(
        pipeline=dauphin_pipeline.get_dagster_pipeline(),
        environment_dict=execute_plan_args.environment_dict,
    )

    if execute_plan_args.step_keys:
        for step_key in execute_plan_args.step_keys:
            if not execution_plan.has_step(step_key):
                return execute_plan_args.graphene_info.schema.type_named(
                    'InvalidStepError')(invalid_step_key=step_key)

    run_config = RunConfig(run_id=run_id, tags=tags)

    step_events = list(
        execute_plan(
            execution_plan=execution_plan,
            environment_dict=execute_plan_args.environment_dict,
            run_config=run_config,
            step_keys_to_execute=execute_plan_args.step_keys,
        ))

    return execute_plan_args.graphene_info.schema.type_named(
        'ExecutePlanSuccess')(
            pipeline=dauphin_pipeline,
            has_failures=any(
                se for se in step_events
                if se.event_type == DagsterEventType.STEP_FAILURE),
            step_events=list(
                map(lambda se: _create_dauphin_step_event(execution_plan, se),
                    step_events)),
        )