Ejemplo n.º 1
0
def _create_type_materializations(step_context: StepExecutionContext,
                                  output_name: str,
                                  value: Any) -> Iterator[DagsterEvent]:
    """If the output has any dagster type materializers, runs them."""

    step = step_context.step
    current_handle = step.solid_handle

    # check for output mappings at every point up the composition hierarchy
    while current_handle:
        solid_config = step_context.resolved_run_config.solids.get(
            current_handle.to_string())
        current_handle = current_handle.parent

        if solid_config is None:
            continue

        for output_spec in solid_config.outputs.type_materializer_specs:
            check.invariant(len(output_spec) == 1)
            config_output_name, output_spec = list(output_spec.items())[0]
            if config_output_name == output_name:
                step_output = step.step_output_named(output_name)
                with user_code_error_boundary(
                        DagsterTypeMaterializationError,
                        msg_fn=lambda:
                    ("Error occurred during output materialization:"
                     f'\n    output name: "{output_name}"'
                     f'\n    solid invocation: "{step_context.solid.name}"'
                     f'\n    solid definition: "{step_context.solid_def.name}"'
                     ),
                        log_manager=step_context.log,
                ):
                    output_def = step_context.solid_def.output_def_named(
                        step_output.name)
                    dagster_type = output_def.dagster_type
                    materializer = dagster_type.materializer
                    if materializer is None:
                        check.failed(
                            "Unexpected attempt to materialize with no materializer available on dagster_type"
                        )
                    materializations = materializer.materialize_runtime_values(
                        step_context, output_spec, value)

                for materialization in materializations:
                    if not isinstance(materialization,
                                      (AssetMaterialization, Materialization)):
                        raise DagsterInvariantViolationError((
                            "materialize_runtime_values on type {type_name} has returned "
                            "value {value} of type {python_type}. You must return an "
                            "AssetMaterialization.").format(
                                type_name=dagster_type.display_name,
                                value=repr(materialization),
                                python_type=type(materialization).__name__,
                            ))

                    yield DagsterEvent.asset_materialization(
                        step_context, materialization)
Ejemplo n.º 2
0
    def log_event(
        self, event: Union[AssetObservation, AssetMaterialization,
                           Materialization]
    ) -> None:
        """Log an AssetMaterialization or AssetObservation from within the body of an io manager's `handle_output` method.

        Events logged with this method will appear in the event log.

        Args:
            event (Union[AssetMaterialization, Materialization, AssetObservation]): The event to log.

        Examples:

        .. code-block:: python

            from dagster import IOManager, AssetMaterialization

            class MyIOManager(IOManager):
                def handle_output(self, context, obj):
                    context.log_event(AssetMaterialization("foo"))
        """
        from dagster.core.events import DagsterEvent

        if isinstance(event, (AssetMaterialization, Materialization)):
            if self._step_context:
                self._events.append(
                    DagsterEvent.asset_materialization(
                        self._step_context,
                        event,
                        self._step_context.get_input_lineage(),
                    ))
            self._user_events.append(event)
        elif isinstance(event, AssetObservation):
            if self._step_context:
                self._events.append(
                    DagsterEvent.asset_observation(self._step_context, event))
            self._user_events.append(event)
        else:
            check.failed("Unexpected event {event}".format(event=event))
Ejemplo n.º 3
0
    def log_event(self, event: UserEvent) -> None:
        """Log an AssetMaterialization, AssetObservation, or ExpectationResult from within the body of an op.

        Events logged with this method will appear in the list of DagsterEvents, as well as the event log.

        Args:
            event (Union[AssetMaterialization, Materialization, AssetObservation, ExpectationResult]): The event to log.

        **Examples:**

        .. code-block:: python

            from dagster import op, AssetMaterialization

            @op
            def log_materialization(context):
                context.log_event(AssetMaterialization("foo"))
        """

        if isinstance(event, (AssetMaterialization, Materialization)):
            self._events.append(
                DagsterEvent.asset_materialization(
                    self._step_execution_context,
                    event,
                    self._step_execution_context.get_input_lineage(),
                ))
        elif isinstance(event, AssetObservation):
            self._events.append(
                DagsterEvent.asset_observation(self._step_execution_context,
                                               event))
        elif isinstance(event, ExpectationResult):
            self._events.append(
                DagsterEvent.step_expectation_result(
                    self._step_execution_context, event))
        else:
            check.failed("Unexpected event {event}".format(event=event))
Ejemplo n.º 4
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(
        step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    with solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda:
        (f'Error occurred while handling output "{output_context.name}" of '
         f'step "{step_context.step.key}":'),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
    ):
        handle_output_res = output_manager.handle_output(
            output_context, output.value)

    manager_materializations = []
    manager_metadata_entries = []
    if handle_output_res is not None:
        for elt in ensure_gen(handle_output_res):
            if isinstance(elt, AssetMaterialization):
                manager_materializations.append(elt)
            elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)):
                experimental_functionality_warning(
                    "Yielding metadata from an IOManager's handle_output() function"
                )
                manager_metadata_entries.append(elt)
            else:
                raise DagsterInvariantViolationError(
                    f"IO manager on output {output_def.name} has returned "
                    f"value {elt} of type {type(elt).__name__}. The return type can only be "
                    "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry."
                )

    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        yield DagsterEvent.asset_materialization(step_context, materialization,
                                                 input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager)
    if asset_key:
        for materialization in _get_output_asset_materializations(
                asset_key,
                partitions,
                output,
                output_def,
                manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context,
                                                     materialization,
                                                     input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        message_override=
        f'Handled input "{step_output_handle.output_name}" using intermediate storage'
        if isinstance(output_manager, IntermediateStorageAdapter) else None,
        metadata_entries=[
            entry for entry in manager_metadata_entries
            if isinstance(entry, EventMetadataEntry)
        ],
    )
Ejemplo n.º 5
0
def core_dagster_event_sequence_for_step(
    step_context: StepExecutionContext, ) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)

    if step_context.previous_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(
            step_context, step_context.previous_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    input_lineage = []

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        input_lineage.extend(step_input.source.get_asset_lineage(step_context))

        for event_or_input_value in ensure_gen(
                step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    input_lineage = _dedup_asset_lineage(input_lineage)
    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(
                        step_context, user_event, input_lineage):
                    yield evt
            # for now, I'm ignoring AssetMaterializations yielded manually, but we might want
            # to do something with these in the above path eventually
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.asset_materialization(
                    step_context, user_event, input_lineage)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Ejemplo n.º 6
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    manager_materializations = []
    manager_metadata_entries: List[Union[PartitionMetadataEntry, MetadataEntry]] = []

    # output_manager.handle_output is either a generator function, or a normal function with or
    # without a return value. In the case that handle_output is a normal function, we need to
    # catch errors should they be raised before a return value. We can do this by wrapping
    # handle_output in a generator so that errors will be caught within iterate_with_context.

    if not inspect.isgeneratorfunction(output_manager.handle_output):

        def _gen_fn():
            gen_output = output_manager.handle_output(output_context, output.value)
            for event in output_context.consume_events():
                yield event
            if gen_output:
                yield gen_output

        handle_output_gen = _gen_fn()
    else:
        handle_output_gen = output_manager.handle_output(output_context, output.value)

    for elt in iterate_with_context(
        lambda: solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda: (
                f'Error occurred while handling output "{output_context.name}" of '
                f'step "{step_context.step.key}":'
            ),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
        ),
        handle_output_gen,
    ):
        for event in output_context.consume_events():
            yield event

        manager_metadata_entries.extend(output_context.consume_logged_metadata_entries())
        if isinstance(elt, DagsterEvent):
            yield elt
        elif isinstance(elt, AssetMaterialization):
            manager_materializations.append(elt)
        elif isinstance(elt, (MetadataEntry, PartitionMetadataEntry)):
            experimental_functionality_warning(
                "Yielding metadata from an IOManager's handle_output() function"
            )
            manager_metadata_entries.append(elt)
        else:
            raise DagsterInvariantViolationError(
                f"IO manager on output {output_def.name} has returned "
                f"value {elt} of type {type(elt).__name__}. The return type can only be "
                "one of AssetMaterialization, MetadataEntry, PartitionMetadataEntry."
            )

    for event in output_context.consume_events():
        yield event

    manager_metadata_entries.extend(output_context.consume_logged_metadata_entries())
    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        if materialization.metadata_entries and manager_metadata_entries:
            raise DagsterInvariantViolationError(
                f"When handling output '{output_context.name}' of {output_context.solid_def.node_type_str} '{output_context.solid_def.name}', received a materialization with metadata, while context.add_output_metadata was used within the same call to handle_output. Due to potential conflicts, this is not allowed. Please specify metadata in one place within the `handle_output` function."
            )
        if manager_metadata_entries:
            materialization = AssetMaterialization(
                asset_key=materialization.asset_key,
                description=materialization.description,
                metadata_entries=manager_metadata_entries,
                partition=materialization.partition,
                tags=materialization.tags,
                metadata=None,
            )
        yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager
    )
    if asset_key:
        for materialization in _get_output_asset_materializations(
            asset_key,
            partitions,
            output,
            output_def,
            manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        metadata_entries=[
            entry for entry in manager_metadata_entries if isinstance(entry, MetadataEntry)
        ],
    )
Ejemplo n.º 7
0
def core_dagster_event_sequence_for_step(
    step_context: StepExecutionContext,
) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)

    if step_context.previous_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context, step_context.previous_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue
        for event_or_input_value in ensure_gen(step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
            _type_checked_event_sequence_for_input(step_context, input_name, input_value)
        ):
            yield evt

    input_lineage = step_context.get_input_lineage()

    # The core execution loop expects a compute generator in a specific format: a generator that
    # takes a context and dictionary of inputs as input, yields output events. If a solid definition
    # was generated from the @solid or @lambda_solid decorator, then compute_fn needs to be coerced
    # into this format. If the solid definition was created directly, then it is expected that the
    # compute_fn is already in this format.
    if isinstance(step_context.solid_def.compute_fn, DecoratedSolidFunction):
        core_gen = create_solid_compute_wrapper(step_context.solid_def)
    else:
        core_gen = step_context.solid_def.compute_fn

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            execute_core_compute(
                step_context,
                inputs,
                core_gen,
            )
        )

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
            _step_output_error_checked_user_event_sequence(step_context, user_event_sequence)
        ):
            if isinstance(user_event, DagsterEvent):
                yield user_event
            elif isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(step_context, user_event, input_lineage):
                    yield evt
            # for now, I'm ignoring AssetMaterializations yielded manually, but we might want
            # to do something with these in the above path eventually
            elif isinstance(user_event, (AssetMaterialization, Materialization)):
                yield DagsterEvent.asset_materialization(step_context, user_event, input_lineage)
            elif isinstance(user_event, AssetObservation):
                yield DagsterEvent.asset_observation(step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier".format(
                        event=user_event
                    )
                )

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis)
    )
Ejemplo n.º 8
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    manager_materializations = []
    manager_metadata_entries = []

    # output_manager.handle_output is either a generator function, or a normal function with or
    # without a return value. In the case that handle_output is a normal function, we need to
    # catch errors should they be raised before a return value. We can do this by wrapping
    # handle_output in a generator so that errors will be caught within iterate_with_context.

    if not inspect.isgeneratorfunction(output_manager.handle_output):

        def _gen_fn():
            gen_output = output_manager.handle_output(output_context, output.value)
            if gen_output:
                yield gen_output

        handle_output_gen = _gen_fn()
    else:
        handle_output_gen = output_manager.handle_output(output_context, output.value)

    for elt in iterate_with_context(
        lambda: solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda: (
                f'Error occurred while handling output "{output_context.name}" of '
                f'step "{step_context.step.key}":'
            ),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
        ),
        handle_output_gen,
    ):
        if isinstance(elt, AssetMaterialization):
            manager_materializations.append(elt)
        elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)):
            experimental_functionality_warning(
                "Yielding metadata from an IOManager's handle_output() function"
            )
            manager_metadata_entries.append(elt)
        else:
            raise DagsterInvariantViolationError(
                f"IO manager on output {output_def.name} has returned "
                f"value {elt} of type {type(elt).__name__}. The return type can only be "
                "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry."
            )

    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager
    )
    if asset_key:
        for materialization in _get_output_asset_materializations(
            asset_key,
            partitions,
            output,
            output_def,
            manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        metadata_entries=[
            entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry)
        ],
    )