Example #1
0
def test_parse_invalid_metadata():

    metadata = {"foo": object()}

    with pytest.raises(DagsterInvalidMetadata) as exc_info:
        normalize_metadata(metadata, [])

    entries = normalize_metadata(metadata, [], allow_invalid=True)
    assert len(entries) == 1
    assert entries[0].label == "foo"
    assert entries[0].entry_data == TextMetadataValue("[object] (unserializable)")
Example #2
0
def _execute_summary_stats(type_name, value, event_metadata_fn):
    if not event_metadata_fn:
        return []

    metadata_or_metadata_entries = event_metadata_fn(value)

    invalid_message = (
        "The return value of the user-defined summary_statistics function for pandas "
        f"data frame type {type_name} returned {value}. This function must return "
        "Union[Dict[str, Union[str, float, int, Dict, MetadataValue]], List[MetadataEntry]]"
    )

    metadata = None
    metadata_entries = None

    if isinstance(metadata_or_metadata_entries, list):
        metadata_entries = metadata_or_metadata_entries
    elif isinstance(metadata_or_metadata_entries, dict):
        metadata = metadata_or_metadata_entries
    else:
        raise DagsterInvariantViolationError(invalid_message)

    try:
        return normalize_metadata(metadata, metadata_entries)
    except (DagsterInvalidMetadata, CheckError):
        raise DagsterInvariantViolationError(invalid_message)
Example #3
0
    def add_input_metadata(
        self,
        metadata: Dict[str, Any],
        description: Optional[str] = None,
    ) -> None:
        """Accepts a dictionary of metadata. Metadata entries will appear on the LOADED_INPUT event.
        If the input is an asset, metadata will be attached to an asset observation.

        The asset observation will be yielded from the run and appear in the event log.
        Only valid if the context has an asset key.
        """
        from dagster.core.definitions.metadata import normalize_metadata
        from dagster.core.events import DagsterEvent

        metadata = check.dict_param(metadata, "metadata", key_type=str)
        self._metadata_entries.extend(normalize_metadata(metadata, []))
        if self.asset_key:
            check.opt_str_param(description, "description")

            observation = AssetObservation(
                asset_key=self.asset_key,
                description=description,
                partition=self.asset_partition_key
                if self.has_asset_partitions else None,
                metadata=metadata,
            )
            self._observations.append(observation)
            if self._step_context:
                self._events.append(
                    DagsterEvent.asset_observation(self._step_context,
                                                   observation))
Example #4
0
def test_parse_path_metadata():

    metadata = {"path": Path("/a/b.csv")}

    entries = normalize_metadata(metadata, [])
    assert len(entries) == 1
    assert entries[0].label == "path"
    assert entries[0].entry_data == PathMetadataValue("/a/b.csv")
Example #5
0
    def __init__(
        self,
        name=None,
        dagster_type=None,
        description=None,
        default_value=NoValueSentinel,
        root_manager_key=None,
        metadata=None,
        asset_key=None,
        asset_partitions=None,
        # when adding new params, make sure to update combine_with_inferred below
    ):
        self._name = check_valid_name(name) if name else None

        self._type_not_set = dagster_type is None
        self._dagster_type = check.inst(resolve_dagster_type(dagster_type), DagsterType)

        self._description = check.opt_str_param(description, "description")

        self._default_value = _check_default_value(self._name, self._dagster_type, default_value)

        if root_manager_key:
            experimental_arg_warning("root_manager_key", "InputDefinition.__init__")

        self._root_manager_key = check.opt_str_param(root_manager_key, "root_manager_key")

        self._metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
        self._metadata_entries = check.is_list(
            normalize_metadata(self._metadata, [], allow_invalid=True), MetadataEntry
        )

        if asset_key:
            experimental_arg_warning("asset_key", "InputDefinition.__init__")

        if not callable(asset_key):
            check.opt_inst_param(asset_key, "asset_key", AssetKey)

        self._asset_key = asset_key

        if asset_partitions:
            experimental_arg_warning("asset_partitions", "InputDefinition.__init__")
            check.param_invariant(
                asset_key is not None,
                "asset_partitions",
                'Cannot specify "asset_partitions" argument without also specifying "asset_key"',
            )
        if callable(asset_partitions):
            self._asset_partitions_fn = asset_partitions
        elif asset_partitions is not None:
            asset_partitions = check.opt_set_param(asset_partitions, "asset_partitions", str)
            self._asset_partitions_fn = lambda _: asset_partitions
        else:
            self._asset_partitions_fn = None
Example #6
0
    def add_output_metadata(self, metadata: Dict[str, Any]) -> None:
        """Add a dictionary of metadata to the handled output.

        Metadata entries added will show up in the HANDLED_OUTPUT and ASSET_MATERIALIZATION events for the run.

        Args:
            metadata (Dict[str, Any]): A metadata dictionary to log

        Examples:

        .. code-block:: python

            from dagster import IOManager

            class MyIOManager(IOManager):
                def handle_output(self, context, obj):
                    context.add_output_metadata({"foo": "bar"})
        """
        from dagster.core.definitions.metadata import normalize_metadata

        self._metadata_entries = normalize_metadata(metadata, [])
Example #7
0
    def __new__(
        cls,
        key: AssetKey,
        metadata: Optional[MetadataUserInput] = None,
        io_manager_key: str = "io_manager",
        description: Optional[str] = None,
        partitions_def: Optional[PartitionsDefinition] = None,
    ):

        metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
        metadata_entries = normalize_metadata(metadata, [], allow_invalid=True)

        return super().__new__(
            cls,
            key=check.inst_param(key, "key", AssetKey),
            metadata_entries=metadata_entries,
            io_manager_key=check.str_param(io_manager_key, "io_manager_key"),
            description=check.opt_str_param(description, "description"),
            partitions_def=check.opt_inst_param(
                partitions_def, "partitions_def", PartitionsDefinition
            ),
        )
Example #8
0
def _step_output_error_checked_user_event_sequence(
    step_context: StepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    op_label = step_context.describe_op()
    output_names = list([output_def.name for output_def in step.step_outputs])

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(cast(str, output.output_name)):
            raise DagsterInvariantViolationError(
                f'Core compute for {op_label} returned an output "{output.output_name}" that does '
                f"not exist. The available outputs are {output_names}"
            )

        step_output = step.step_output_named(cast(str, output.output_name))
        output_def = step_context.pipeline_def.get_solid(step_output.solid_handle).output_def_named(
            step_output.name
        )

        if isinstance(output, Output):
            if step_context.has_seen_output(output.output_name):
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} returned an output "{output.output_name}" multiple '
                    "times"
                )

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} for output "{output.output_name}" defined as dynamic '
                    "must yield DynamicOutput, got Output."
                )

            step_context.observe_output(output.output_name)

            metadata = step_context.get_output_metadata(output.output_name)
            output = Output(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
            )
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput, but did not use "
                    "DynamicOutputDefinition."
                )
            if step_context.has_seen_output(output.output_name, output.mapping_key):
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput with mapping_key "
                    f'"{output.mapping_key}" multiple times.'
                )
            step_context.observe_output(output.output_name, output.mapping_key)
            metadata = step_context.get_output_metadata(
                output.output_name, mapping_key=output.mapping_key
            )
            output = DynamicOutput(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
                mapping_key=output.mapping_key,
            )

        yield output

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(step_output.name)
        if not step_context.has_seen_output(step_output_def.name) and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}'
                )
                yield Output(output_name=step_output_def.name, value=None)
            elif not step_output_def.is_dynamic:
                raise DagsterStepOutputNotFoundError(
                    (
                        f"Core compute for {op_label} did not return an output for non-optional "
                        f'output "{step_output_def.name}"'
                    ),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Example #9
0
    def __init__(
        self,
        dagster_type=None,
        name=None,
        description=None,
        is_required=None,
        io_manager_key=None,
        metadata=None,
        asset_key=None,
        asset_partitions=None,
        asset_partitions_def=None
        # make sure new parameters are updated in combine_with_inferred below
    ):
        from dagster.core.definitions.partition import PartitionsDefinition

        self._name = (check_valid_name(
            check.opt_str_param(name, "name", DEFAULT_OUTPUT))
                      if name is not NoNameSentinel else None)
        self._type_not_set = dagster_type is None
        self._dagster_type = resolve_dagster_type(dagster_type)
        self._description = check.opt_str_param(description, "description")
        self._is_required = check.opt_bool_param(is_required,
                                                 "is_required",
                                                 default=True)
        self._manager_key = check.opt_str_param(io_manager_key,
                                                "io_manager_key",
                                                default="io_manager")
        self._metadata = check.opt_dict_param(metadata,
                                              "metadata",
                                              key_type=str)
        self._metadata_entries = check.is_list(
            normalize_metadata(self._metadata, [], allow_invalid=True),
            MetadataEntry)

        if asset_key:
            experimental_arg_warning("asset_key", "OutputDefinition.__init__")

        if callable(asset_key):
            warnings.warn(
                "Passing a function as the `asset_key` argument to `Out` or `OutputDefinition` is "
                "deprecated behavior and will be removed in version 0.15.0.")
        else:
            check.opt_inst_param(asset_key, "asset_key", AssetKey)

        self._asset_key = asset_key

        if asset_partitions:
            experimental_arg_warning("asset_partitions",
                                     "OutputDefinition.__init__")
            check.param_invariant(
                asset_key is not None,
                "asset_partitions",
                'Cannot specify "asset_partitions" argument without also specifying "asset_key"',
            )

        if callable(asset_partitions):
            self._asset_partitions_fn = asset_partitions
        elif asset_partitions is not None:
            asset_partitions = check.opt_set_param(asset_partitions,
                                                   "asset_partitions", str)
            self._asset_partitions_fn = lambda _: asset_partitions
        else:
            self._asset_partitions_fn = None

        if asset_partitions_def:
            experimental_arg_warning("asset_partitions_def",
                                     "OutputDefinition.__init__")
        self._asset_partitions_def = check.opt_inst_param(
            asset_partitions_def, "asset_partition_def", PartitionsDefinition)
Example #10
0
    def __init__(
        self,
        type_check_fn: TypeCheckFn,
        key: t.Optional[str] = None,
        name: t.Optional[str] = None,
        is_builtin: bool = False,
        description: t.Optional[str] = None,
        loader: t.Optional[DagsterTypeLoader] = None,
        materializer: t.Optional[DagsterTypeMaterializer] = None,
        required_resource_keys: t.Set[str] = None,
        kind: DagsterTypeKind = DagsterTypeKind.REGULAR,
        typing_type: t.Any = None,
        metadata_entries: t.Optional[t.List[MetadataEntry]] = None,
        metadata: t.Optional[t.Dict[str, RawMetadataValue]] = None,
    ):
        check.opt_str_param(key, "key")
        check.opt_str_param(name, "name")

        check.invariant(not (name is None and key is None), "Must set key or name")
        if name is None:
            key = check.not_none(
                key,
                "If name is not provided, must provide key.",
            )
            self.key, self._name = key, None
        elif key is None:
            name = check.not_none(
                name,
                "If key is not provided, must provide name.",
            )
            self.key, self._name = name, name
        else:
            check.invariant(key and name)
            self.key, self._name = key, name

        self.description = check.opt_str_param(description, "description")
        self.loader = check.opt_inst_param(loader, "loader", DagsterTypeLoader)
        self.materializer = check.opt_inst_param(
            materializer, "materializer", DagsterTypeMaterializer
        )

        self.required_resource_keys = check.opt_set_param(
            required_resource_keys,
            "required_resource_keys",
        )

        self._type_check_fn = check.callable_param(type_check_fn, "type_check_fn")
        _validate_type_check_fn(self._type_check_fn, self._name)

        self.is_builtin = check.bool_param(is_builtin, "is_builtin")
        check.invariant(
            self.display_name is not None,
            "All types must have a valid display name, got None for key {}".format(key),
        )

        self.kind = check.inst_param(kind, "kind", DagsterTypeKind)

        self.typing_type = typing_type

        metadata_entries = check.opt_list_param(
            metadata_entries, "metadata_entries", of_type=MetadataEntry
        )
        metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
        self._metadata_entries = normalize_metadata(metadata, metadata_entries)
Example #11
0
    def __init__(
        self,
        dagster_type=None,
        name: Optional[str] = None,
        description: Optional[str] = None,
        is_required: bool = True,
        io_manager_key: Optional[str] = None,
        metadata: Optional[MetadataUserInput] = None,
        asset_key: Optional[Union[AssetKey, DynamicAssetKey]] = None,
        asset_partitions: Optional[Union[AbstractSet[str],
                                         Callable[["OutputContext"],
                                                  AbstractSet[str]]]] = None,
        asset_partitions_def: Optional["PartitionsDefinition"] = None
        # make sure new parameters are updated in combine_with_inferred below
    ):
        from dagster.core.definitions.partition import PartitionsDefinition

        self._name = check_valid_name(
            check.opt_str_param(name, "name", DEFAULT_OUTPUT))
        self._type_not_set = dagster_type is None
        self._dagster_type = resolve_dagster_type(dagster_type)
        self._description = check.opt_str_param(description, "description")
        self._is_required = check.bool_param(is_required, "is_required")
        self._io_manager_key = check.opt_str_param(
            io_manager_key,
            "io_manager_key",
            default="io_manager",
        )
        self._metadata = check.opt_dict_param(metadata,
                                              "metadata",
                                              key_type=str)
        self._metadata_entries = check.is_list(
            normalize_metadata(self._metadata, [], allow_invalid=True),
            MetadataEntry)

        if asset_key:
            experimental_arg_warning("asset_key", "OutputDefinition.__init__")

        if callable(asset_key):
            warnings.warn(
                "Passing a function as the `asset_key` argument to `Out` or `OutputDefinition` is "
                "deprecated behavior and will be removed in version 0.15.0.")
        else:
            check.opt_inst_param(asset_key, "asset_key", AssetKey)

        self._asset_key = asset_key

        if asset_partitions:
            experimental_arg_warning("asset_partitions",
                                     "OutputDefinition.__init__")
            check.param_invariant(
                asset_key is not None,
                "asset_partitions",
                'Cannot specify "asset_partitions" argument without also specifying "asset_key"',
            )

        self._asset_partitions_fn: Optional[Callable[["OutputContext"],
                                                     AbstractSet[str]]]
        if callable(asset_partitions):
            self._asset_partitions_fn = asset_partitions
        elif asset_partitions is not None:
            asset_partitions = check.opt_set_param(asset_partitions,
                                                   "asset_partitions", str)

            def _fn(_context: "OutputContext") -> AbstractSet[str]:
                return cast(AbstractSet[str], asset_partitions)  # mypy bug?

            self._asset_partitions_fn = _fn
        else:
            self._asset_partitions_fn = None

        if asset_partitions_def:
            experimental_arg_warning("asset_partitions_def",
                                     "OutputDefinition.__init__")
        self._asset_partitions_def = check.opt_inst_param(
            asset_partitions_def, "asset_partition_def", PartitionsDefinition)