Example #1
0
def test_external_execution_output_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterMarshalOutputError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['return_one.transform', 'add_one.transform'],
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'result',
                    'path': 23434
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )

    assert (
        str(exc_info.value) ==
        'Error during the marshalling of output result in step add_one.transform'
    )
    assert exc_info.value.output_name == 'result'
    assert exc_info.value.step_key == 'add_one.transform'
Example #2
0
def test_external_execution_input_marshal_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(IOError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['add_one.transform'],
        inputs_to_marshal={'add_one.transform': {
            'num': 'nope'
        }},
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 1
    marshal_result = results[0]
    assert marshal_result.success is False
    assert marshal_result.step.kind == StepKind.UNMARSHAL_INPUT
    assert isinstance(marshal_result.failure_data.dagster_error.user_exception,
                      IOError)
Example #3
0
def test_external_execution_step_for_output_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            outputs_to_marshal={'nope': [MarshalledOutput('nope', 'nope')]},
            execution_metadata=ExecutionMetadata(),
        )
Example #4
0
def test_external_execution_output_code_error_throw_on_user_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(Exception) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['user_throw_exception.transform'],
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    assert str(exc_info.value) == 'whoops'
Example #5
0
def test_external_execution_step_for_input_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'nope': {
                'nope': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert exc_info.value.step_key == 'nope'
Example #6
0
def test_external_execution_marshal_output_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    # guaranteed that folder does not exist
    hardcoded_uuid = '83fb4ace-5cab-459d-99b6-2ca9808c54a1'

    outputs_to_marshal = {
        'add_one.transform': [
            MarshalledOutput(
                output_name='result',
                marshalling_key='{uuid}/{uuid}'.format(uuid=hardcoded_uuid))
        ]
    }

    with pytest.raises(IOError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['return_one.transform', 'add_one.transform'],
            outputs_to_marshal=outputs_to_marshal,
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    assert 'No such file or directory' in str(exc_info.value)

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['return_one.transform', 'add_one.transform'],
        outputs_to_marshal=outputs_to_marshal,
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 3

    results_dict = {result.step.key: result for result in results}

    assert results_dict['return_one.transform'].success is True
    assert results_dict['add_one.transform'].success is True
    assert results_dict[
        'add_one.transform.marshal-output.result'].success is False
Example #7
0
def test_external_execution_output_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterMarshalOutputNotFoundError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'nope',
                    'path': 'nope'
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )
Example #8
0
def test_external_execution_unsatsified_input_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterInvalidSubplanExecutionError) as exc_info:
        execute_externalized_plan(pipeline,
                                  execution_plan, ['add_one.transform'],
                                  execution_metadata=ExecutionMetadata())

    assert exc_info.value.pipeline_name == 'basic_external_plan_execution'
    assert exc_info.value.step_keys == ['add_one.transform']
    assert exc_info.value.step_key == 'add_one.transform'
    assert exc_info.value.input_name == 'num'

    assert str(exc_info.value) == (
        "You have specified a subset execution on pipeline basic_external_plan_execution with "
        "step_keys ['add_one.transform']. You have failed to provide the required input num for "
        "step add_one.transform.")
Example #9
0
def test_external_execution_marshal_wrong_input_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterUnmarshalInputNotFoundError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'nope': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert str(
        exc_info.value
    ) == 'Input nope does not exist in execution step add_one.transform'
    assert exc_info.value.input_name == 'nope'
    assert exc_info.value.step_key == 'add_one.transform'
Example #10
0
def test_external_execution_input_marshal_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterUnmarshalInputError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert (str(
        exc_info.value
    ) == 'Error during the marshalling of input num in step add_one.transform')
    assert exc_info.value.input_name == 'num'
    assert exc_info.value.step_key == 'add_one.transform'
Example #11
0
def test_basic_pipeline_external_plan_execution():
    pipeline = define_inty_pipeline()

    with get_temp_file_names(2) as temp_files:

        temp_path, write_path = temp_files  # pylint: disable=W0632

        int_type = resolve_to_runtime_type(Int)

        serialize_to_file(int_type.serialization_strategy, 5, temp_path)

        execution_plan = create_execution_plan(pipeline)

        results = execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': temp_path
            }},
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'result',
                    'path': write_path
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )

        assert deserialize_from_file(int_type.serialization_strategy,
                                     write_path) == 6

    assert len(results) == 2

    thunk_step_result = results[0]

    assert thunk_step_result.kind == StepKind.VALUE_THUNK

    transform_step_result = results[1]
    assert transform_step_result.kind == StepKind.TRANSFORM
    assert transform_step_result.success
    assert transform_step_result.success_data.output_name == 'result'
    assert transform_step_result.success_data.value == 6
Example #12
0
def test_external_execution_output_code_error_no_throw_on_user_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['user_throw_exception.transform'],
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 1
    step_result = results[0]
    assert isinstance(step_result.failure_data.dagster_error,
                      DagsterExecutionStepExecutionError)
    assert str(
        step_result.failure_data.dagster_error.user_exception) == 'whoops'
Example #13
0
def _execute_subplan_or_error(subplan_execution_args, dauphin_pipeline,
                              execution_plan, evaluate_value_result):
    check.inst_param(subplan_execution_args, 'subplan_execution_args',
                     SubplanExecutionArgs)
    check.inst_param(dauphin_pipeline, 'dauphin_pipeline', DauphinPipeline)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(evaluate_value_result, 'evaluate_value_result',
                     EvaluateValueResult)

    inputs_to_marshal = defaultdict(dict)
    outputs_to_marshal = defaultdict(list)

    for step_execution in subplan_execution_args.step_executions:
        step = execution_plan.get_step_by_key(step_execution.step_key)
        for marshalled_input in step_execution.marshalled_inputs:
            if not step.has_step_input(marshalled_input.input_name):
                schema = subplan_execution_args.info.schema
                return EitherError(
                    schema.type_named('StartSubplanExecutionInvalidInputError')
                    (
                        step=schema.type_named('ExecutionStep')(step),
                        invalid_input_name=marshalled_input.input_name,
                    ))

            inputs_to_marshal[step.key][
                marshalled_input.input_name] = marshalled_input.key

        for marshalled_output in step_execution.marshalled_outputs:
            if not step.has_step_output(marshalled_output.output_name):
                schema = subplan_execution_args.info.schema
                return EitherError(
                    schema.type_named(
                        'StartSubplanExecutionInvalidOutputError')(
                            step=schema.type_named('ExecutionStep')(step),
                            invalid_output_name=marshalled_output.output_name,
                        ))

            outputs_to_marshal[step.key].append({
                'output': marshalled_output.output_name,
                'path': marshalled_output.key
            })

    schema = subplan_execution_args.info.schema

    try:
        _results = execute_externalized_plan(
            pipeline=dauphin_pipeline.get_dagster_pipeline(),
            execution_plan=execution_plan,
            step_keys=subplan_execution_args.step_keys,
            inputs_to_marshal=dict(inputs_to_marshal),
            outputs_to_marshal=outputs_to_marshal,
            environment=evaluate_value_result.value,
            execution_metadata=subplan_execution_args.execution_metadata,
        )

    except DagsterUserCodeExecutionError as ducee:
        return EitherError(
            schema.type_named('PythonError')(
                serializable_error_info_from_exc_info(
                    ducee.original_exc_info)))

    except DagsterInvalidSubplanExecutionError as invalid_subplan_error:
        return EitherError(
            schema.type_named('InvalidSubplanExecutionError')(
                step=schema.type_named('ExecutionStep')(
                    execution_plan.get_step_by_key(
                        invalid_subplan_error.step_key)),
                missing_input_name=invalid_subplan_error.input_name,
            ))

    # TODO: Handle error conditions here

    return subplan_execution_args.info.schema.type_named(
        'StartSubplanExecutionSuccess')(pipeline=dauphin_pipeline)
Example #14
0
def _execute_subplan_or_error(args, dauphin_pipeline, execution_plan,
                              evaluate_value_result):
    check.inst_param(args, 'args', SubplanExecutionArgs)
    check.inst_param(dauphin_pipeline, 'dauphin_pipeline', DauphinPipeline)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(evaluate_value_result, 'evaluate_value_result',
                     EvaluateValueResult)

    # convenient shortcuts for brevity so things fit on one line
    # type_of retrieves a dauphin type class based on graphql type name
    type_of = args.info.schema.type_named
    # step_of retrives the step associated with an error object
    step_of = lambda error: execution_plan.get_step_by_key(error.step_key)

    try:
        results = execute_externalized_plan(
            pipeline=dauphin_pipeline.get_dagster_pipeline(),
            execution_plan=execution_plan,
            step_keys=args.step_keys,
            inputs_to_marshal=_get_inputs_to_marshal(args),
            outputs_to_marshal={
                se.step_key: se.marshalled_outputs
                for se in args.step_executions
            },
            environment=evaluate_value_result.value,
            execution_metadata=args.execution_metadata,
            throw_on_user_error=False,
        )

        has_failures = False
        dauphin_step_results = []
        for result in results:
            if not result.success:
                has_failures = True

            dauphin_step_results.append(
                type_of('StepSuccessResult')(
                    success=result.success,
                    step=type_of('ExecutionStep')(result.step),
                    output_name=result.success_data.output_name,
                    value_repr=repr(result.success_data.value),
                ) if result.success else type_of('StepFailureResult')(
                    success=result.success,
                    step=type_of('ExecutionStep')(result.step),
                    error_message=str(result.failure_data.dagster_error),
                ))

        return type_of('StartSubplanExecutionSuccess')(
            pipeline=dauphin_pipeline,
            has_failures=has_failures,
            step_results=dauphin_step_results)

    except DagsterInvalidSubplanExecutionError as invalid_subplan_error:
        return EitherError(
            type_of('InvalidSubplanExecutionError')(
                step=type_of('ExecutionStep')(step_of(invalid_subplan_error)),
                missing_input_name=invalid_subplan_error.input_name,
            ))

    except DagsterUnmarshalInputNotFoundError as input_not_found_error:
        return EitherError(
            type_of('StartSubplanExecutionInvalidInputError')(
                step=type_of('ExecutionStep')(step_of(input_not_found_error)),
                invalid_input_name=input_not_found_error.input_name,
            ))

    except DagsterMarshalOutputNotFoundError as output_not_found_error:
        return EitherError(
            type_of('StartSubplanExecutionInvalidOutputError')(
                step=type_of('ExecutionStep')(step_of(output_not_found_error)),
                invalid_output_name=output_not_found_error.output_name,
            ))

    # https://github.com/dagster-io/dagster/issues/763
    # Once this issue is resolve we should be able to eliminate this
    except DagsterUserCodeExecutionError as ducee:
        return EitherError(
            type_of('PythonError')(serializable_error_info_from_exc_info(
                ducee.original_exc_info)))

    check.failed('Should not get here')