def test_external_execution_input_marshal_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(IOError): execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'num': 'nope' }}, execution_metadata=ExecutionMetadata(), throw_on_user_error=True, ) results = execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'num': 'nope' }}, execution_metadata=ExecutionMetadata(), throw_on_user_error=False, ) assert len(results) == 1 marshal_result = results[0] assert marshal_result.success is False assert marshal_result.step.kind == StepKind.UNMARSHAL_INPUT assert isinstance(marshal_result.failure_data.dagster_error.user_exception, IOError)
def to_metadata(self): tags = {} if tags: for tag in self.tags: # pylint: disable=E1133 tags[tag['key']] = tag['value'] return ExecutionMetadata(run_id=self.runId, tags=tags)
def test_external_execution_output_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterMarshalOutputError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['return_one.transform', 'add_one.transform'], outputs_to_marshal={ 'add_one.transform': [{ 'output': 'result', 'path': 23434 }] }, execution_metadata=ExecutionMetadata(), ) assert ( str(exc_info.value) == 'Error during the marshalling of output result in step add_one.transform' ) assert exc_info.value.output_name == 'result' assert exc_info.value.step_key == 'add_one.transform'
def test_external_execution_marshal_output_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) # guaranteed that folder does not exist hardcoded_uuid = '83fb4ace-5cab-459d-99b6-2ca9808c54a1' outputs_to_marshal = { 'add_one.transform': [ MarshalledOutput( output_name='result', marshalling_key='{uuid}/{uuid}'.format(uuid=hardcoded_uuid)) ] } with pytest.raises(IOError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['return_one.transform', 'add_one.transform'], outputs_to_marshal=outputs_to_marshal, execution_metadata=ExecutionMetadata(), throw_on_user_error=True, ) assert 'No such file or directory' in str(exc_info.value) results = execute_externalized_plan( pipeline, execution_plan, ['return_one.transform', 'add_one.transform'], outputs_to_marshal=outputs_to_marshal, execution_metadata=ExecutionMetadata(), throw_on_user_error=False, ) assert len(results) == 3 results_dict = {result.step.key: result for result in results} assert results_dict['return_one.transform'].success is True assert results_dict['add_one.transform'].success is True assert results_dict[ 'add_one.transform.marshal-output.result'].success is False
def test_external_execution_output_code_error_throw_on_user_error(): pipeline = define_inty_pipeline() with pytest.raises(Exception) as exc_info: execute_marshalling( pipeline, ['user_throw_exception.transform'], execution_metadata=ExecutionMetadata(), throw_on_user_error=True, ) assert str(exc_info.value) == 'whoops'
def test_external_execution_step_for_output_missing(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError): execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], outputs_to_marshal={'nope': [MarshalledOutput('nope', 'nope')]}, execution_metadata=ExecutionMetadata(), )
def create_test_pipeline_execution_context(loggers=None, resources=None, tags=None): run_id = str(uuid.uuid4()) pipeline_def = PipelineDefinition(name='test_legacy_context', solids=[]) return construct_pipeline_execution_context( execution_metadata=ExecutionMetadata(run_id, tags=tags, loggers=loggers), pipeline=pipeline_def, execution_context=ExecutionContext(), resources=resources, environment_config=create_environment_config(pipeline_def), )
def test_external_execution_unsatisfied_input_error(): pipeline = define_inty_pipeline() with pytest.raises(DagsterInvalidSubplanMissingInputError) as exc_info: execute_marshalling(pipeline, ['add_one.transform'], execution_metadata=ExecutionMetadata()) assert exc_info.value.pipeline_name == 'basic_external_plan_execution' assert exc_info.value.step_keys == ['add_one.transform'] assert exc_info.value.step.key == 'add_one.transform' assert exc_info.value.input_name == 'num' assert str(exc_info.value) == ( "You have specified a subset execution on pipeline basic_external_plan_execution with " "step_keys ['add_one.transform']. You have failed to provide the required input num for " "step add_one.transform.")
def test_external_execution_output_code_error_no_throw_on_user_error(): pipeline = define_inty_pipeline() step_events = execute_marshalling( pipeline, ['user_throw_exception.transform'], execution_metadata=ExecutionMetadata(), throw_on_user_error=False, ) assert len(step_events) == 1 step_event = step_events[0] assert isinstance(step_event.failure_data.dagster_error, DagsterExecutionStepExecutionError) assert str( step_event.failure_data.dagster_error.user_exception) == 'whoops'
def test_external_execution_step_for_input_missing(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'nope': { 'nope': 'nope' }}, execution_metadata=ExecutionMetadata(), ) assert exc_info.value.step_key == 'nope'
def test_external_execution_output_missing(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterMarshalOutputNotFoundError): execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], outputs_to_marshal={ 'add_one.transform': [{ 'output': 'nope', 'path': 'nope' }] }, execution_metadata=ExecutionMetadata(), )
def test_basic_pipeline_external_plan_execution(): pipeline = define_inty_pipeline() with get_temp_file_names(2) as temp_files: temp_path, write_path = temp_files # pylint: disable=W0632 int_type = resolve_to_runtime_type(Int) serialize_to_file(int_type.serialization_strategy, 5, temp_path) execution_plan = create_execution_plan(pipeline) results = execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'num': temp_path }}, outputs_to_marshal={ 'add_one.transform': [{ 'output': 'result', 'path': write_path }] }, execution_metadata=ExecutionMetadata(), ) assert deserialize_from_file(int_type.serialization_strategy, write_path) == 6 assert len(results) == 2 thunk_step_result = results[0] assert thunk_step_result.kind == StepKind.VALUE_THUNK transform_step_result = results[1] assert transform_step_result.kind == StepKind.TRANSFORM assert transform_step_result.success assert transform_step_result.success_data.output_name == 'result' assert transform_step_result.success_data.value == 6
def test_external_execution_output_code_error_no_throw_on_user_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) results = execute_externalized_plan( pipeline, execution_plan, ['user_throw_exception.transform'], execution_metadata=ExecutionMetadata(), throw_on_user_error=False, ) assert len(results) == 1 step_result = results[0] assert isinstance(step_result.failure_data.dagster_error, DagsterExecutionStepExecutionError) assert str( step_result.failure_data.dagster_error.user_exception) == 'whoops'
def test_external_execution_marshal_wrong_input_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterUnmarshalInputNotFoundError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'nope': 'nope' }}, execution_metadata=ExecutionMetadata(), ) assert str( exc_info.value ) == 'Input nope does not exist in execution step add_one.transform' assert exc_info.value.input_name == 'nope' assert exc_info.value.step_key == 'add_one.transform'
def test_external_execution_input_marshal_code_error(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterUnmarshalInputError) as exc_info: execute_externalized_plan( pipeline, execution_plan, ['add_one.transform'], inputs_to_marshal={'add_one.transform': { 'num': 'nope' }}, execution_metadata=ExecutionMetadata(), ) assert (str( exc_info.value ) == 'Error during the marshalling of input num in step add_one.transform') assert exc_info.value.input_name == 'num' assert exc_info.value.step_key == 'add_one.transform'
def aws_lambda_handler(event, _context): """The lambda handler function.""" logger.setLevel(logging.INFO) ( run_id, step_idx, key, s3_bucket, s3_key_inputs, s3_key_body, s3_key_resources, s3_key_outputs, ) = LambdaInvocationPayload(*event['config']) s3 = boto3.client('s3') logger.info( 'Beginning execution of lambda function for run_id %s step %s (%s)', run_id, step_idx, key) logger.info('Looking for inputs at %s/%s', s3_bucket, s3_key_inputs) intermediate_results_object = s3.get_object(Bucket=s3_bucket, Key=s3_key_inputs) intermediate_results = deserialize( intermediate_results_object['Body'].read()) logger.info('Looking for resources at %s/%s', s3_bucket, s3_key_resources) resources_object = s3.get_object(Bucket=s3_bucket, Key=s3_key_resources) resources = deserialize(resources_object['Body'].read()) execution_context = construct_pipeline_execution_context( pipeline=PipelineDefinition(name='dummy', solids=[]), execution_metadata=ExecutionMetadata(run_id=run_id), execution_context=ExecutionContext(loggers=[logger]), resources=resources, environment_config={}, ) logger.info('Looking for step body at %s/%s', s3_bucket, s3_key_body) step_body_object = s3.get_object(Bucket=s3_bucket, Key=s3_key_body) step = deserialize(step_body_object['Body'].read()) logger.info('Checking inputs') if not _all_inputs_covered(step, intermediate_results): result_keys = set(intermediate_results.keys()) expected_outputs = [ni.prev_output_handle for ni in step.step_inputs] logger.error( 'Not all inputs covered for %s. Not executing.\nKeys in result: %s' '\nOutputs needed for inputs %s', key, result_keys, expected_outputs, ) raise Exception() logger.info('Constructing input values') input_values = {} for step_input in step.step_inputs: prev_output_handle = step_input.prev_output_handle handle = (prev_output_handle.step.key, prev_output_handle.output_name) # FIXME - we need a less hacky strategy for serializing and deserializing input handles and # result values -- the subscript below is like accessing .success_data on the namedtuple # in the simple engine input_value = intermediate_results[handle][1].value input_values[step_input.name] = input_value logger.info('Executing step {key}'.format(key=key)) step_events = list( iterate_step_events_for_step(step, execution_context, input_values)) for step_event in step_events: check.invariant(isinstance(step_event, ExecutionStepEvent)) output_name = step_event.success_data.output_name output_handle = (step.key, output_name) intermediate_results[output_handle] = ( step_event.success, step_event.success_data, step_event.failure_data, ) logger.info('Processing result: %s', output_name) logger.info('Uploading intermediate_results to %s', s3_key_outputs) s3.put_object( ACL='public-read', Body=serialize(intermediate_results), Bucket=s3_bucket, Key=s3_key_outputs, )