Exemple #1
0
def _store_output(
    step_context: SystemStepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
) -> Iterator[DagsterEvent]:
    output_def = step_context.solid_def.output_def_named(
        step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)
    with user_code_error_boundary(
            DagsterExecutionHandleOutputError,
            control_flow_exceptions=[Failure, RetryRequested],
            msg_fn=lambda:
        (f"Error occurred during the the handling of step output:"
         f'    step key: "{step_context.step.key}"'
         f'    output name: "{output_context.name}"'),
            step_key=step_context.step.key,
            output_name=output_context.name,
    ):
        materializations = output_manager.handle_output(
            output_context, output.value)

    for evt in _materializations_to_events(step_context, step_output_handle,
                                           materializations):
        yield evt

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        message_override=
        f'Handled input "{step_output_handle.output_name}" using intermediate storage'
        if isinstance(output_manager, IntermediateStorageAdapter) else None,
    )
def _user_event_sequence_for_step_compute_fn(step_context, evaluated_inputs):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(evaluated_inputs, 'evaluated_inputs', key_type=str)

    with user_code_error_boundary(
            DagsterExecutionStepExecutionError,
            msg_fn=lambda: '''Error occured during the execution of step:
        step key: "{key}"
        solid invocation: "{solid}"
        solid definition: "{solid_def}"
        '''.format(
                key=step_context.step.key,
                solid_def=step_context.solid_def.name,
                solid=step_context.solid.name,
            ),
            step_key=step_context.step.key,
            solid_def_name=step_context.solid_def.name,
            solid_name=step_context.solid.name,
    ):

        gen = check.opt_generator(
            step_context.step.compute_fn(step_context, evaluated_inputs))

        if gen is not None:
            for event in gen:
                yield event
Exemple #3
0
def get_external_sensor_execution(recon_repo, instance_ref, sensor_name,
                                  last_completion_timestamp, last_run_key,
                                  cursor):
    check.inst_param(
        recon_repo,
        "recon_repo",
        ReconstructableRepository,
    )

    definition = recon_repo.get_definition()
    sensor_def = definition.get_sensor_def(sensor_name)

    with SensorEvaluationContext(
            instance_ref,
            last_completion_time=last_completion_timestamp,
            last_run_key=last_run_key,
            cursor=cursor,
            repository_name=recon_repo.get_definition().name,
    ) as sensor_context:
        try:
            with user_code_error_boundary(
                    SensorExecutionError,
                    lambda:
                    "Error occurred during the execution of evaluation_fn for sensor "
                    "{sensor_name}".format(sensor_name=sensor_def.name),
            ):
                return sensor_def.evaluate_tick(sensor_context)
        except Exception:
            return ExternalSensorExecutionErrorData(
                serializable_error_info_from_exc_info(sys.exc_info()))
def _set_objects(step_context, step_output, step_output_handle, output):
    output_def = step_output.output_def
    output_manager = step_context.get_output_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)
    with user_code_error_boundary(
            DagsterExecutionHandleOutputError,
            control_flow_exceptions=[Failure, RetryRequested],
            msg_fn=lambda:
        (f"Error occurred during the the handling of step output:"
         f'    step key: "{step_context.step.key}"'
         f'    output name: "{output_context.name}"'),
            step_key=step_context.step.key,
            output_name=output_context.name,
    ):
        materializations = output_manager.handle_output(
            output_context, output.value)

    for evt in _materializations_to_events(step_context, step_output_handle,
                                           materializations):
        yield evt

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
    )
Exemple #5
0
    def ExternalPartitionTags(self, request, _context):
        partition_args = deserialize_json_to_dagster_namedtuple(
            request.serialized_partition_args)

        check.inst_param(partition_args, 'partition_args', PartitionArgs)

        recon_repo = self._recon_repository_from_origin(
            partition_args.repository_origin)
        definition = recon_repo.get_definition()
        partition_set_def = definition.get_partition_set_def(
            partition_args.partition_set_name)
        partition = partition_set_def.get_partition(
            partition_args.partition_name)
        try:
            with user_code_error_boundary(
                    PartitionExecutionError,
                    lambda:
                    'Error occurred during the evaluation of the `tags_for_partition` function for '
                    'partition set {partition_set_name}'.format(
                        partition_set_name=partition_set_def.name),
            ):
                tags = partition_set_def.tags_for_partition(partition)
                return api_pb2.ExternalPartitionTagsReply(
                    serialized_external_partition_tags_or_external_partition_execution_error
                    =serialize_dagster_namedtuple(
                        ExternalPartitionTagsData(name=partition.name,
                                                  tags=tags)))
        except PartitionExecutionError:
            return api_pb2.ExternalPartitionTagsReply(
                serialized_external_partition_tags_or_external_partition_execution_error
                =serialize_dagster_namedtuple(
                    ExternalPartitionExecutionErrorData(
                        serializable_error_info_from_exc_info(
                            sys.exc_info()))))
Exemple #6
0
def _user_event_sequence_for_step_compute_fn(
        step_context: SystemStepExecutionContext,
        evaluated_inputs: Dict[str, Any]) -> Iterator[SolidOutputUnion]:
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.dict_param(evaluated_inputs, "evaluated_inputs", key_type=str)

    gen = execute_core_compute(
        step_context.for_compute(),
        evaluated_inputs,
        step_context.solid_def.compute_fn,
    )

    for event in iterate_with_context(
            lambda: user_code_error_boundary(
                DagsterExecutionStepExecutionError,
                control_flow_exceptions=[Failure, RetryRequested],
                msg_fn=lambda: """Error occurred during the execution of step:
            step key: "{key}"
            solid invocation: "{solid}"
            solid definition: "{solid_def}"
            """.format(
                    key=step_context.step.key,
                    solid_def=step_context.solid_def.name,
                    solid=step_context.solid.name,
                ),
                step_key=step_context.step.key,
                solid_def_name=step_context.solid_def.name,
                solid_name=step_context.solid.name,
            ),
            gen,
    ):
        yield event
Exemple #7
0
def _user_event_sequence_for_step_compute_fn(step_context, evaluated_inputs):
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.dict_param(evaluated_inputs, "evaluated_inputs", key_type=str)

    with user_code_error_boundary(
            DagsterExecutionStepExecutionError,
            control_flow_exceptions=[Failure, RetryRequested],
            msg_fn=lambda: """Error occurred during the execution of step:
        step key: "{key}"
        solid invocation: "{solid}"
        solid definition: "{solid_def}"
        """.format(
                key=step_context.step.key,
                solid_def=step_context.solid_def.name,
                solid=step_context.solid.name,
            ),
            step_key=step_context.step.key,
            solid_def_name=step_context.solid_def.name,
            solid_name=step_context.solid.name,
    ):
        gen = check.opt_generator(
            step_context.step.compute_fn(step_context, evaluated_inputs))
        if not gen:
            return

        # Allow interrupts again during each step of the execution
        for event in iterate_with_context(raise_interrupts_immediately, gen):
            yield event
Exemple #8
0
def partition_data_command(args):
    check.inst_param(args, 'args', PartitionApiCommandArgs)
    recon_repo = recon_repository_from_origin(args.repository_origin)
    definition = recon_repo.get_definition()
    partition_set_def = definition.get_partition_set_def(
        args.partition_set_name)
    partition = partition_set_def.get_partition(args.partition_name)
    try:
        with user_code_error_boundary(
                PartitionScheduleExecutionError,
                lambda:
                'Error occurred during the execution of user-provided partition functions for '
                'partition set {partition_set_name}'.format(
                    partition_set_name=partition_set_def.name),
        ):
            run_config = partition_set_def.environment_dict_for_partition(
                partition)
            tags = partition_set_def.tags_for_partition(partition)
            return ExternalPartitionData(name=partition.name,
                                         tags=tags,
                                         run_config=run_config)
    except PartitionScheduleExecutionError:
        return ExternalPartitionData(
            name=partition.name,
            error=serializable_error_info_from_exc_info(sys.exc_info()))
def _iterate_step_outputs_within_boundary(step_context, evaluated_inputs):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(evaluated_inputs, 'evaluated_inputs', key_type=str)

    error_str = '''Error occured during the execution of step:
    step key: "{key}"
    solid instance: "{solid}"
    solid definition: "{solid_def}"
    '''.format(
        key=step_context.step.key,
        solid_def=step_context.solid_def.name,
        solid=step_context.solid.name,
    )

    with user_code_error_boundary(
            DagsterExecutionStepExecutionError,
            error_str,
            step_key=step_context.step.key,
            solid_def_name=step_context.solid_def.name,
            solid_name=step_context.solid.name,
    ):
        gen = check.opt_generator(
            step_context.step.compute_fn(step_context, evaluated_inputs))

        if gen is not None:
            for step_output in gen:
                yield step_output
Exemple #10
0
def get_external_sensor_execution(
    recon_repo, instance_ref, sensor_name, last_completion_timestamp, last_run_key
):
    check.inst_param(
        recon_repo,
        "recon_repo",
        ReconstructableRepository,
    )

    definition = recon_repo.get_definition()
    sensor_def = definition.get_sensor_def(sensor_name)

    with DagsterInstance.from_ref(instance_ref) as instance:
        sensor_context = SensorExecutionContext(
            instance, last_completion_time=last_completion_timestamp, last_run_key=last_run_key
        )

        try:
            with user_code_error_boundary(
                SensorExecutionError,
                lambda: "Error occurred during the execution of evaluation_fn for sensor "
                "{sensor_name}".format(sensor_name=sensor_def.name),
            ):
                return ExternalSensorExecutionData.from_execution_data(
                    sensor_def.get_execution_data(sensor_context)
                )
        except SensorExecutionError:
            return ExternalSensorExecutionErrorData(
                serializable_error_info_from_exc_info(sys.exc_info())
            )
Exemple #11
0
def _get_mapped_solids_dict(composite_def, current_stack,
                            current_solid_config):
    # the spec of the config mapping function is that it takes the dictionary at:
    # solid_name:
    #    config: {dict_passed_to_user}

    # and it returns the dictionary rooted at solids
    # solid_name:
    #    solids: {return_value_of_config_fn}

    # We must call the config mapping function and then validate it against
    # the child schema.

    with user_code_error_boundary(DagsterConfigMappingFunctionError,
                                  _get_error_lambda(current_stack)):
        mapped_solids_config = composite_def.config_mapping.config_fn(
            current_solid_config.get('config', {}))

    # Dynamically construct the type that the output of the config mapping function will
    # be evaluated against

    type_to_evaluate_against = define_solid_dictionary_cls(
        composite_def.solids, composite_def.dependency_structure,
        current_stack.handle)

    # process against that new type

    evr = process_config(type_to_evaluate_against, mapped_solids_config)

    if not evr.success:
        raise_composite_descent_config_error(current_stack,
                                             mapped_solids_config, evr)

    return evr.value
Exemple #12
0
def _user_event_sequence_for_step_compute_fn(
        step_context: StepExecutionContext,
        evaluated_inputs: Dict[str, Any]) -> Iterator[SolidOutputUnion]:
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.dict_param(evaluated_inputs, "evaluated_inputs", key_type=str)

    gen = execute_core_compute(
        step_context,
        evaluated_inputs,
        step_context.solid_def.compute_fn,
    )

    for event in iterate_with_context(
            lambda: user_code_error_boundary(
                DagsterExecutionStepExecutionError,
                control_flow_exceptions=[Failure, RetryRequested],
                msg_fn=lambda:
                f'Error occurred while executing solid "{step_context.solid.name}":',
                step_key=step_context.step.key,
                solid_def_name=step_context.solid_def.name,
                solid_name=step_context.solid.name,
            ),
            gen,
    ):
        yield event
Exemple #13
0
def user_code_context_manager(user_fn, error_cls, msg_fn):
    '''Wraps the output of a user provided function that may yield or return a value and
    returns a generator that asserts it only yields a single value.
    '''
    check.callable_param(user_fn, 'user_fn')
    check.subclass_param(error_cls, 'error_cls', DagsterUserCodeExecutionError)

    with user_code_error_boundary(error_cls, msg_fn):
        thing_or_gen = user_fn()
        gen = ensure_gen(thing_or_gen)

        try:
            thing = next(gen)
        except StopIteration:
            check.failed('Must yield one item. You did not yield anything.')

        yield thing

        stopped = False

        try:
            next(gen)
        except StopIteration:
            stopped = True

        check.invariant(stopped,
                        'Must yield one item. Yielded more than one item')
Exemple #14
0
def _type_checked_event_sequence_for_input(
        step_context: StepExecutionContext, input_name: str,
        input_value: Any) -> Iterator[DagsterEvent]:
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.str_param(input_name, "input_name")

    step_input = step_context.step.step_input_named(input_name)
    input_def = step_input.source.get_input_def(step_context.pipeline_def)
    dagster_type = input_def.dagster_type
    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda:
        (f'Error occurred while type-checking input "{input_name}" of solid '
         f'"{str(step_context.step.solid_handle)}", with Python type {type(input_value)} and '
         f"Dagster type {dagster_type.display_name}"),
    ):
        type_check = do_type_check(step_context.for_type(dagster_type),
                                   dagster_type, input_value)

    yield _create_step_input_event(step_context,
                                   input_name,
                                   type_check=type_check,
                                   success=type_check.success)

    if not type_check.success:
        raise DagsterTypeCheckDidNotPass(
            description=(f'Type check failed for step input "{input_name}" - '
                         f'expected type "{dagster_type.display_name}". '
                         f"Description: {type_check.description}."),
            metadata_entries=type_check.metadata_entries,
            dagster_type=dagster_type,
        )
Exemple #15
0
def get_external_schedule_execution(external_schedule_execution_args):
    check.inst_param(
        external_schedule_execution_args,
        'external_schedule_execution_args',
        ExternalScheduleExecutionArgs,
    )

    recon_repo = recon_repository_from_origin(
        external_schedule_execution_args.repository_origin)
    definition = recon_repo.get_definition()
    schedule_def = definition.get_schedule_def(
        external_schedule_execution_args.schedule_name)
    instance = DagsterInstance.from_ref(
        external_schedule_execution_args.instance_ref)
    schedule_context = ScheduleExecutionContext(instance)
    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution of run_config_fn for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            run_config = schedule_def.get_run_config(schedule_context)
            return ExternalScheduleExecutionData(run_config=run_config)
    except ScheduleExecutionError:
        return ExternalScheduleExecutionErrorData(
            serializable_error_info_from_exc_info(sys.exc_info()))
Exemple #16
0
    def ExternalPartitionNames(self, request, _context):
        partition_names_args = deserialize_json_to_dagster_namedtuple(
            request.serialized_partition_names_args
        )

        check.inst_param(partition_names_args, "partition_names_args", PartitionNamesArgs)

        recon_repo = self._recon_repository_from_origin(partition_names_args.repository_origin)
        definition = recon_repo.get_definition()
        partition_set_def = definition.get_partition_set_def(
            partition_names_args.partition_set_name
        )
        try:
            with user_code_error_boundary(
                PartitionExecutionError,
                lambda: "Error occurred during the execution of the partition generation function for "
                "partition set {partition_set_name}".format(
                    partition_set_name=partition_set_def.name
                ),
            ):
                return api_pb2.ExternalPartitionNamesReply(
                    serialized_external_partition_names_or_external_partition_execution_error=serialize_dagster_namedtuple(
                        ExternalPartitionNamesData(
                            partition_names=partition_set_def.get_partition_names()
                        )
                    )
                )
        except PartitionExecutionError:
            return api_pb2.ExternalPartitionNamesReply(
                serialized_external_partition_names_or_external_partition_execution_error=serialize_dagster_namedtuple(
                    ExternalPartitionExecutionErrorData(
                        serializable_error_info_from_exc_info(sys.exc_info())
                    )
                )
            )
Exemple #17
0
def get_external_schedule_execution(
    recon_repo,
    instance_ref,
    schedule_name,
    scheduled_execution_timestamp,
    scheduled_execution_timezone,
):
    check.inst_param(
        recon_repo,
        "recon_repo",
        ReconstructableRepository,
    )
    definition = recon_repo.get_definition()
    schedule_def = definition.get_schedule_def(schedule_name)
    scheduled_execution_time = (pendulum.from_timestamp(
        scheduled_execution_timestamp,
        tz=scheduled_execution_timezone,
    ) if scheduled_execution_timestamp else None)

    schedule_context = ScheduleExecutionContext(instance_ref,
                                                scheduled_execution_time)

    try:
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                "Error occurred during the execution function for schedule "
                "{schedule_name}".format(schedule_name=schedule_def.name),
        ):
            return ExternalScheduleExecutionData.from_execution_data(
                schedule_def.get_execution_data(schedule_context))
    except ScheduleExecutionError:
        return ExternalScheduleExecutionErrorData(
            serializable_error_info_from_exc_info(sys.exc_info()))
Exemple #18
0
def _type_checked_step_output_event_sequence(step_context, output):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.inst_param(output, 'output', Output)

    step_output = step_context.step.step_output_named(output.output_name)
    try:
        with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda: (
                'In solid "{handle}" the output "{output_name}" received '
                'value {output_value} of Python type {output_type} which '
                'does not pass the typecheck for Dagster type '
                '{dagster_type_name}. Step {step_key}.'
            ).format(
                handle=str(step_context.step.solid_handle),
                output_name=output.output_name,
                output_value=output.value,
                output_type=type(output.value),
                dagster_type_name=step_output.runtime_type.name,
                step_key=step_context.step.key,
            ),
        ):
            yield _create_step_output_event(
                step_context,
                output,
                type_check=_do_type_check(step_output.runtime_type, output.value),
                success=True,
            )
    except Exception as failure:  # pylint: disable=broad-except
        yield _create_step_output_event(
            step_context, output, type_check=_type_check_from_failure(failure), success=False
        )

        raise failure
Exemple #19
0
def single_resource_event_generator(context, resource_name, resource_def):
    try:
        msg_fn = lambda: "Error executing resource_fn on ResourceDefinition {name}".format(
            name=resource_name)
        with user_code_error_boundary(DagsterResourceFunctionError,
                                      msg_fn,
                                      log_manager=context.log):
            try:
                with time_execution_scope() as timer_result:
                    resource_or_gen = (resource_def.resource_fn(context)
                                       if is_context_provided(
                                           get_function_params(
                                               resource_def.resource_fn)) else
                                       resource_def.resource_fn())

                    # Flag for whether resource is generator. This is used to ensure that teardown
                    # occurs when resources are initialized out of execution.
                    is_gen = inspect.isgenerator(
                        resource_or_gen) or isinstance(resource_or_gen,
                                                       ContextDecorator)

                    resource_iter = _wrapped_resource_iterator(resource_or_gen)
                    resource = next(resource_iter)
                resource = InitializedResource(
                    resource, format_duration(timer_result.millis), is_gen)
            except StopIteration:
                check.failed(
                    "Resource generator {name} must yield one item.".format(
                        name=resource_name))

        yield resource

    except DagsterUserCodeExecutionError as dagster_user_error:
        raise dagster_user_error

    with user_code_error_boundary(DagsterResourceFunctionError,
                                  msg_fn,
                                  log_manager=context.log):
        try:
            next(resource_iter)
        except StopIteration:
            pass
        else:
            check.failed(
                "Resource generator {name} yielded more than one item.".format(
                    name=resource_name))
Exemple #20
0
 def _invoke_user_config_fn(self, processed_config):
     with user_code_error_boundary(
             DagsterConfigMappingFunctionError,
             _get_user_code_error_str_lambda(self.parent_def),
     ):
         return {
             "config": self._config_fn(processed_config.get("config", {}))
         }
Exemple #21
0
 def load_input_object(self, step_context):
     with user_code_error_boundary(
             DagsterTypeLoadingError,
             msg_fn=_generate_error_boundary_msg_for_step_input(
                 step_context, self.input_name),
     ):
         return self.dagster_type.loader.construct_from_config_value(
             step_context, self.config_data)
Exemple #22
0
def get_partition_set_execution_param_data(args):
    check.inst_param(args, "args", PartitionSetExecutionParamArgs)
    recon_repo = recon_repository_from_origin(args.repository_origin)
    repo_definition = recon_repo.get_definition()
    partition_set_def = repo_definition.get_partition_set_def(args.partition_set_name)
    try:
        with user_code_error_boundary(
            PartitionExecutionError,
            lambda: "Error occurred during the partition generation for partition set "
            "{partition_set_name}".format(partition_set_name=partition_set_def.name),
        ):
            all_partitions = partition_set_def.get_partitions()
        partitions = [
            partition for partition in all_partitions if partition.name in args.partition_names
        ]

        partition_data = []
        for partition in partitions:

            def _error_message_fn(partition_set_name, partition_name):
                return lambda: (
                    "Error occurred during the partition config and tag generation for "
                    "partition set {partition_set_name}::{partition_name}".format(
                        partition_set_name=partition_set_name, partition_name=partition_name
                    )
                )

            with user_code_error_boundary(
                PartitionExecutionError, _error_message_fn(partition_set_def.name, partition.name)
            ):
                run_config = partition_set_def.run_config_for_partition(partition)
                tags = partition_set_def.tags_for_partition(partition)

            partition_data.append(
                ExternalPartitionExecutionParamData(
                    name=partition.name, tags=tags, run_config=run_config,
                )
            )

        return ExternalPartitionSetExecutionParamData(partition_data=partition_data)

    except PartitionExecutionError:
        return ExternalPartitionExecutionErrorData(
            serializable_error_info_from_exc_info(sys.exc_info())
        )
Exemple #23
0
def get_partition_set_execution_param_data(recon_repo, partition_set_name,
                                           partition_names):
    repo_definition = recon_repo.get_definition()
    partition_set_def = repo_definition.get_partition_set_def(
        partition_set_name)
    try:
        with user_code_error_boundary(
                PartitionExecutionError,
                lambda: "Error occurred during the partition generation for "
                f"{_get_target_for_partition_execution_error(partition_set_def)}",
        ):
            all_partitions = partition_set_def.get_partitions()
        partitions = [
            partition for partition in all_partitions
            if partition.name in partition_names
        ]

        partition_data = []
        for partition in partitions:

            def _error_message_fn(partition_name):
                return lambda: (
                    "Error occurred during the partition config and tag generation for "
                    f"'{partition_name}' in {_get_target_for_partition_execution_error(partition_set_def)}"
                )

            with user_code_error_boundary(PartitionExecutionError,
                                          _error_message_fn(partition.name)):
                run_config = partition_set_def.run_config_for_partition(
                    partition)
                tags = partition_set_def.tags_for_partition(partition)

            partition_data.append(
                ExternalPartitionExecutionParamData(
                    name=partition.name,
                    tags=tags,
                    run_config=run_config,
                ))

        return ExternalPartitionSetExecutionParamData(
            partition_data=partition_data)

    except PartitionExecutionError:
        return ExternalPartitionExecutionErrorData(
            serializable_error_info_from_exc_info(sys.exc_info()))
Exemple #24
0
def _input_values_from_intermediates_manager(step_context):
    step = step_context.step

    input_values = {}
    for step_input in step.step_inputs:
        if step_input.runtime_type.is_nothing:
            continue

        if step_input.is_from_multiple_outputs:
            if hasattr(step_input.runtime_type, 'inner_type'):
                runtime_type = step_input.runtime_type.inner_type
            else:  # This is the case where the fan-in is typed Any
                runtime_type = step_input.runtime_type
            _input_value = [
                step_context.intermediates_manager.get_intermediate(
                    step_context, runtime_type, source_handle)
                for source_handle in step_input.source_handles
            ]
            # When we're using an object store-backed intermediate store, we wrap the
            # ObjectStoreOperation[] representing the fan-in values in a MultipleStepOutputsListWrapper
            # so we can yield the relevant object store events and unpack the values in the caller
            if all(
                (isinstance(x, ObjectStoreOperation) for x in _input_value)):
                input_value = MultipleStepOutputsListWrapper(_input_value)
            else:
                input_value = _input_value

        elif step_input.is_from_single_output:
            input_value = step_context.intermediates_manager.get_intermediate(
                step_context, step_input.runtime_type,
                step_input.source_handles[0])

        else:  # is from config

            def _generate_error_boundary_msg_for_step_input(_context, _input):
                return lambda: '''Error occured during input hydration:
                input name: "{input}"
                step key: "{key}"
                solid invocation: "{solid}"
                solid definition: "{solid_def}"
                '''.format(
                    input=_input.name,
                    key=_context.step.key,
                    solid_def=_context.solid_def.name,
                    solid=_context.solid.name,
                )

            with user_code_error_boundary(
                    DagsterInputHydrationConfigError,
                    msg_fn=_generate_error_boundary_msg_for_step_input(
                        step_context, step_input),
            ):
                input_value = step_input.runtime_type.input_hydration_config.construct_from_config_value(
                    step_context, step_input.config_data)
        input_values[step_input.name] = input_value

    return input_values
Exemple #25
0
def _create_type_materializations(step_context: SystemStepExecutionContext,
                                  output_name: str,
                                  value: Any) -> Iterator[DagsterEvent]:
    """If the output has any dagster type materializers, runs them."""

    step = step_context.step
    current_handle = step.solid_handle

    # check for output mappings at every point up the composition hierarchy
    while current_handle:
        solid_config = step_context.environment_config.solids.get(
            current_handle.to_string())
        current_handle = current_handle.parent

        if solid_config is None:
            continue

        for output_spec in solid_config.outputs.type_materializer_specs:
            check.invariant(len(output_spec) == 1)
            config_output_name, output_spec = list(output_spec.items())[0]
            if config_output_name == output_name:
                step_output = step.step_output_named(output_name)
                with user_code_error_boundary(
                        DagsterTypeMaterializationError,
                        msg_fn=lambda:
                        """Error occurred during output materialization:
                    output name: "{output_name}"
                    step key: "{key}"
                    solid invocation: "{solid}"
                    solid definition: "{solid_def}"
                    """.format(
                            output_name=output_name,
                            key=step_context.step.key,
                            solid_def=step_context.solid_def.name,
                            solid=step_context.solid.name,
                        ),
                ):
                    output_def = step_context.solid_def.output_def_named(
                        step_output.name)
                    dagster_type = output_def.dagster_type
                    materializations = dagster_type.materializer.materialize_runtime_values(
                        step_context, output_spec, value)

                for materialization in materializations:
                    if not isinstance(materialization,
                                      (AssetMaterialization, Materialization)):
                        raise DagsterInvariantViolationError((
                            "materialize_runtime_values on type {type_name} has returned "
                            "value {value} of type {python_type}. You must return an "
                            "AssetMaterialization.").format(
                                type_name=dagster_type.display_name,
                                value=repr(materialization),
                                python_type=type(materialization).__name__,
                            ))

                    yield DagsterEvent.step_materialization(
                        step_context, materialization)
Exemple #26
0
def _type_check_output(
    step_context: SystemStepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Any,
    version: Optional[str],
) -> Iterator[DagsterEvent]:
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(output, "output", (Output, DynamicOutput))

    step_output = step_context.step.step_output_named(output.output_name)
    step_output_def = step_context.solid_def.output_def_named(step_output.name)

    dagster_type = step_output_def.dagster_type
    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda: ('In solid "{handle}" the output "{output_name}" received '
                     "value {output_value} of Python type {output_type} which "
                     "does not pass the typecheck for Dagster type "
                     "{dagster_type_name}. Step {step_key}.").format(
                         handle=str(step_context.step.solid_handle),
                         output_name=output.output_name,
                         output_value=output.value,
                         output_type=type(output.value),
                         dagster_type_name=dagster_type.display_name,
                         step_key=step_context.step.key,
                     ),
    ):
        type_check = _do_type_check(step_context.for_type(dagster_type),
                                    dagster_type, output.value)

    yield DagsterEvent.step_output_event(
        step_context=step_context,
        step_output_data=StepOutputData(
            step_output_handle=step_output_handle,
            type_check_data=TypeCheckData(
                success=type_check.success,
                label=step_output_handle.output_name,
                description=type_check.description if type_check else None,
                metadata_entries=type_check.metadata_entries
                if type_check else [],
            ),
            version=version,
        ),
    )

    if not type_check.success:
        raise DagsterTypeCheckDidNotPass(
            description=
            'Type check failed for step output "{output_name}" - expected type "{dagster_type}".'
            .format(
                output_name=output.output_name,
                dagster_type=dagster_type.display_name,
            ),
            metadata_entries=type_check.metadata_entries,
            dagster_type=dagster_type,
        )
Exemple #27
0
def _type_check_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Any,
    version: Optional[str],
) -> Iterator[DagsterEvent]:
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.inst_param(output, "output", (Output, DynamicOutput))

    step_output = step_context.step.step_output_named(output.output_name)
    step_output_def = step_context.solid_def.output_def_named(step_output.name)

    dagster_type = step_output_def.dagster_type
    type_check_context = step_context.for_type(dagster_type)
    op_label = step_context.describe_op()
    output_type = type(output.value)

    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda:
        (f'Error occurred while type-checking output "{output.output_name}" of {op_label}, with '
         f"Python type {output_type} and Dagster type {dagster_type.display_name}"
         ),
            log_manager=type_check_context.log,
    ):
        type_check = do_type_check(type_check_context, dagster_type,
                                   output.value)

    yield DagsterEvent.step_output_event(
        step_context=step_context,
        step_output_data=StepOutputData(
            step_output_handle=step_output_handle,
            type_check_data=TypeCheckData(
                success=type_check.success,
                label=step_output_handle.output_name,
                description=type_check.description if type_check else None,
                metadata_entries=type_check.metadata_entries
                if type_check else [],
            ),
            version=version,
            metadata_entries=[
                entry for entry in output.metadata_entries
                if isinstance(entry, MetadataEntry)
            ],
        ),
    )

    if not type_check.success:
        raise DagsterTypeCheckDidNotPass(
            description=(
                f'Type check failed for step output "{output.output_name}" - '
                f'expected type "{dagster_type.display_name}". '
                f"Description: {type_check.description}"),
            metadata_entries=type_check.metadata_entries,
            dagster_type=dagster_type,
        )
Exemple #28
0
def _get_mapped_solids_dict(
    composite,
    graph_def,
    current_stack,
    current_solid_config,
    resource_defs,
    is_using_graph_job_op_apis,
):
    # the spec of the config mapping function is that it takes the dictionary at:
    # solid_name:
    #    config: {dict_passed_to_user}

    # and it returns the dictionary rooted at solids
    # solid_name:
    #    solids: {return_value_of_config_fn}

    # We must call the config mapping function and then validate it against
    # the child schema.

    # apply @configured config mapping to the composite's incoming config before we get to the
    # composite's own config mapping process
    config_mapped_solid_config = graph_def.apply_config_mapping(
        current_solid_config)
    if not config_mapped_solid_config.success:
        raise DagsterInvalidConfigError(
            "Error in config for composite solid {}".format(composite.name),
            config_mapped_solid_config.errors,
            config_mapped_solid_config,
        )

    with user_code_error_boundary(DagsterConfigMappingFunctionError,
                                  _get_error_lambda(current_stack)):
        mapped_solids_config = graph_def.config_mapping.resolve_from_validated_config(
            config_mapped_solid_config.value.get("config", {}))

    # Dynamically construct the type that the output of the config mapping function will
    # be evaluated against

    type_to_evaluate_against = define_solid_dictionary_cls(
        solids=graph_def.solids,
        ignored_solids=None,
        dependency_structure=graph_def.dependency_structure,
        parent_handle=current_stack.handle,
        resource_defs=resource_defs,
        is_using_graph_job_op_apis=is_using_graph_job_op_apis,
    )

    # process against that new type

    evr = process_config(type_to_evaluate_against, mapped_solids_config)

    if not evr.success:
        raise_composite_descent_config_error(current_stack,
                                             mapped_solids_config, evr)

    return evr.value
Exemple #29
0
def _type_checked_step_output_event_sequence(step_context, output):
    from dagster.core.execution.api import create_execution_plan

    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(output, "output", Output)

    step_output = step_context.step.step_output_named(output.output_name)
    speculative_execution_plan = create_execution_plan(
        step_context.pipeline_def,
        run_config=step_context.run_config,
        mode=step_context.mode_def.name,
    )

    version = resolve_step_output_versions(
        speculative_execution_plan,
        run_config=step_context.run_config,
        mode=step_context.mode_def.name,
    )[StepOutputHandle(step_context.step.key, output.output_name)]
    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda: ('In solid "{handle}" the output "{output_name}" received '
                     "value {output_value} of Python type {output_type} which "
                     "does not pass the typecheck for Dagster type "
                     "{dagster_type_name}. Step {step_key}.").format(
                         handle=str(step_context.step.solid_handle),
                         output_name=output.output_name,
                         output_value=output.value,
                         output_type=type(output.value),
                         dagster_type_name=step_output.dagster_type.name,
                         step_key=step_context.step.key,
                     ),
    ):
        type_check = _do_type_check(
            step_context.for_type(step_output.dagster_type),
            step_output.dagster_type, output.value)

        yield _create_step_output_event(
            step_context,
            output,
            type_check=type_check,
            success=type_check.success,
            version=version,
        )

        if not type_check.success:
            raise DagsterTypeCheckDidNotPass(
                description=
                "Type check failed for step output {output_name} of type {dagster_type}."
                .format(
                    output_name=output.output_name,
                    dagster_type=step_output.dagster_type.name,
                ),
                metadata_entries=type_check.metadata_entries,
                dagster_type=step_output.dagster_type,
            )
Exemple #30
0
def _input_values_from_intermediate_storage(step_context):
    step = step_context.step

    for step_input in step.step_inputs:
        if step_input.dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        if step_input.is_from_multiple_outputs:
            if (step_input.dagster_type.kind == DagsterTypeKind.LIST or
                    step_input.dagster_type.kind == DagsterTypeKind.NULLABLE):
                dagster_type = step_input.dagster_type.inner_type
            else:  # This is the case where the fan-in is typed Any
                dagster_type = step_input.dagster_type

            input_value = [
                step_context.intermediate_storage.get_intermediate(
                    context=step_context,
                    step_output_handle=source_handle,
                    dagster_type=dagster_type,
                ) for source_handle in step_input.source_handles
                # Filter out missing intermediates from skipped upstream outputs
                if step_context.intermediate_storage.has_intermediate(
                    step_context, source_handle)
            ]

            # When we're using an object store-backed intermediate store, we wrap the
            # ObjectStoreOperation[] representing the fan-in values in a MultipleStepOutputsListWrapper
            # so we can yield the relevant object store events and unpack the values in the caller
            if all((isinstance(x, ObjectStoreOperation) for x in input_value)):
                input_value = MultipleStepOutputsListWrapper(input_value)

        elif step_input.is_from_single_output:
            input_value = step_context.intermediate_storage.get_intermediate(
                context=step_context,
                step_output_handle=step_input.source_handles[0],
                dagster_type=step_input.dagster_type,
            )

        elif step_input.is_from_config:
            with user_code_error_boundary(
                    DagsterTypeLoadingError,
                    msg_fn=_generate_error_boundary_msg_for_step_input(
                        step_context, step_input),
            ):
                input_value = step_input.dagster_type.loader.construct_from_config_value(
                    step_context, step_input.config_data)

        elif step_input.is_from_default_value:
            input_value = step_input.config_data

        else:
            check.failed("Unhandled step_input type!")

        yield step_input.name, input_value