Beispiel #1
0
def test_long_int():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class NumHolder(NamedTuple):
        num: int

    x = NumHolder(98765432109876543210)
    ser_x = _serialize_dagster_namedtuple(x, test_map)
    roundtrip_x = _deserialize_json(ser_x, test_map)
    assert x.num == roundtrip_x.num
Beispiel #2
0
def test_descent_path():
    class Foo(NamedTuple):
        bar: int

    with pytest.raises(SerializationError, match=re.escape("Descent path: <root:dict>.a.b[2].c")):
        serialize_value({"a": {"b": [{}, {}, {"c": Foo(1)}]}})

    test_map = WhitelistMap.create()
    blank_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Fizz(NamedTuple):
        buzz: int

    ser = _serialize_dagster_namedtuple(
        {"a": {"b": [{}, {}, {"c": Fizz(1)}]}}, whitelist_map=test_map
    )

    with pytest.raises(DeserializationError, match=re.escape("Descent path: <root:dict>.a.b[2].c")):
        _deserialize_json(ser, whitelist_map=blank_map)
Beispiel #3
0
def test_enum_backcompat():
    test_env = WhitelistMap.create()

    class MyEnumSerializer(EnumSerializer):
        @classmethod
        def value_from_storage_str(cls, storage_str, klass):
            return getattr(klass, storage_str)

        @classmethod
        def value_to_storage_str(cls, value, whitelist_map, descent_path):
            val_as_str = str(value)
            actual_enum_val = val_as_str.split(".")[1:]
            backcompat_name = (
                "OldEnum"  # Simulate changing the storage name to some legacy backcompat name
            )
            return ".".join([backcompat_name, *actual_enum_val])

    @_whitelist_for_serdes(test_env, serializer=MyEnumSerializer)
    class MyEnum(Enum):
        RED = "color.red"
        BLUE = "color.red"

    # Ensure that serdes roundtrip preserves value
    register_serdes_enum_fallbacks({"OldEnum": MyEnum}, whitelist_map=test_env)

    my_enum = MyEnum("color.red")
    enum_json = serialize_value(my_enum, whitelist_map=test_env)
    result = _deserialize_json(enum_json, test_env)
    assert result == my_enum

    # ensure that "legacy" environment can correctly interpret enum stored under legacy name.
    legacy_env = WhitelistMap.create()

    @_whitelist_for_serdes(legacy_env)
    class OldEnum(Enum):
        RED = "color.red"
        BLUE = "color.blue"

    result = _deserialize_json(enum_json, legacy_env)
    old_enum = OldEnum("color.red")
    assert old_enum == result
Beispiel #4
0
def test_persistent_tuple():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Alphabet(namedtuple("_Alphabet", "a b c")):
        def __new__(cls, a, b, c):
            return super(Alphabet, cls).__new__(cls, a, b, c)

    foo = Alphabet(a="A", b="B", c="C")
    serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map)
    foo_2 = _deserialize_json(serialized, whitelist_map=test_map)
    assert foo == foo_2
Beispiel #5
0
def test_pipeline_run_status_dagster_run_status():
    # serialize in current code
    test_status = DagsterRunStatus("QUEUED")
    test_str = serialize_value(test_status)

    # deserialize in "legacy" code
    legacy_env = WhitelistMap.create()

    @_whitelist_for_serdes(legacy_env)
    class PipelineRunStatus(Enum):
        QUEUED = "QUEUED"

    result = _deserialize_json(test_str, legacy_env)
    assert isinstance(result, PipelineRunStatus)
    assert result.value == test_status.value
Beispiel #6
0
def test_solid_handle_node_handle():
    # serialize in current code
    test_handle = NodeHandle("test", None)
    test_str = serialize_dagster_namedtuple(test_handle)

    # deserialize in "legacy" code
    legacy_env = WhitelistMap.create()

    @_whitelist_for_serdes(legacy_env)
    class SolidHandle(namedtuple("_SolidHandle", "name parent")):
        pass

    result = _deserialize_json(test_str, legacy_env)
    assert isinstance(result, SolidHandle)
    assert result.name == test_handle.name
Beispiel #7
0
    def build_legacy_whitelist_map():
        legacy_env = WhitelistMap.create()

        @_whitelist_for_serdes(legacy_env)
        class ExternalJobOrigin(
                namedtuple("_ExternalJobOrigin",
                           "external_repository_origin job_name")):
            def get_id(self):
                return create_snapshot_id(self)

        @_whitelist_for_serdes(legacy_env)
        class ExternalRepositoryOrigin(
                namedtuple("_ExternalRepositoryOrigin",
                           "repository_location_origin repository_name")):
            def get_id(self):
                return create_snapshot_id(self)

        class GrpcServerOriginSerializer(DefaultNamedTupleSerializer):
            @classmethod
            def skip_when_empty(cls):
                return {"use_ssl"}

        @_whitelist_for_serdes(whitelist_map=legacy_env,
                               serializer=GrpcServerOriginSerializer)
        class GrpcServerRepositoryLocationOrigin(
                namedtuple(
                    "_GrpcServerRepositoryLocationOrigin",
                    "host port socket location_name use_ssl",
                ), ):
            def __new__(cls,
                        host,
                        port=None,
                        socket=None,
                        location_name=None,
                        use_ssl=None):
                return super(GrpcServerRepositoryLocationOrigin,
                             cls).__new__(cls, host, port, socket,
                                          location_name, use_ssl)

        return (
            legacy_env,
            ExternalJobOrigin,
            ExternalRepositoryOrigin,
            GrpcServerRepositoryLocationOrigin,
        )
Beispiel #8
0
def test_namedtuple_name_map():

    wmap = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=wmap)
    class Thing(NamedTuple):
        name: str

    wmap.register_serialized_name("Thing", "SerializedThing")
    thing = Thing("foo")

    thing_serialized = _serialize_dagster_namedtuple(thing, wmap)
    assert seven.json.loads(thing_serialized)["__class__"] == "SerializedThing"

    with pytest.raises(DeserializationError):
        _deserialize_json(thing_serialized, wmap)

    wmap.register_deserialized_name("SerializedThing", "Thing")
    assert _deserialize_json(thing_serialized, wmap) == thing
Beispiel #9
0
def test_serdes_enum_backcompat():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Corge(Enum):
        FOO = 1
        BAR = 2

    assert test_map.has_enum_entry("Corge")

    corge = Corge.FOO

    packed = pack_inner_value(corge, whitelist_map=test_map, descent_path="")

    class CorgeBackCompatSerializer(DefaultEnumSerializer):
        @classmethod
        def value_from_storage_str(cls, storage_str, klass):
            if storage_str == "FOO":
                value = "FOO_FOO"
            else:
                value = storage_str

            return super().value_from_storage_str(value, klass)

    # pylint: disable=function-redefined
    @_whitelist_for_serdes(whitelist_map=test_map,
                           serializer=CorgeBackCompatSerializer)
    class Corge(Enum):
        BAR = 2
        BAZ = 3
        FOO_FOO = 4

    unpacked = unpack_inner_value(packed,
                                  whitelist_map=test_map,
                                  descent_path="")

    assert unpacked != corge
    assert unpacked == Corge.FOO_FOO
Beispiel #10
0
def test_backward_compat_serdes():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Quux(namedtuple("_Quux", "foo bar baz")):
        def __new__(cls, foo, bar, baz):
            return super(Quux, cls).__new__(cls, foo, bar, baz)  # pylint: disable=bad-super-call

    quux = Quux("zip", "zow", "whoopie")

    serialized = _serialize_dagster_namedtuple(quux, whitelist_map=test_map)

    # pylint: disable=function-redefined
    @_whitelist_for_serdes(whitelist_map=test_map)
    class Quux(namedtuple("_Quux", "foo bar")):  # pylint: disable=bad-super-call
        def __new__(cls, foo, bar):
            return super(Quux, cls).__new__(cls, foo, bar)

    deserialized = _deserialize_json(serialized, whitelist_map=test_map)

    assert deserialized != quux
    assert deserialized.foo == quux.foo
    assert deserialized.bar == quux.bar
    assert not hasattr(deserialized, "baz")
Beispiel #11
0
def test_namedtuple_backcompat():
    old_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=old_map)
    class OldThing(NamedTuple):
        old_name: str

        def get_id(self):
            json_rep = _serialize_dagster_namedtuple(self,
                                                     whitelist_map=old_map)
            return hash_str(json_rep)

    # create the old things
    old_thing = OldThing("thing")
    old_thing_id = old_thing.get_id()
    old_thing_serialized = _serialize_dagster_namedtuple(old_thing, old_map)

    new_map = WhitelistMap.create()

    class ThingSerializer(DefaultNamedTupleSerializer):
        @classmethod
        def value_from_storage_dict(cls, storage_dict, klass, args_for_class,
                                    whitelist_map, descent_path):
            raw_dict = {
                key: unpack_inner_value(value, whitelist_map,
                                        f"{descent_path}.{key}")
                for key, value in storage_dict.items()
            }
            # typical pattern is to use the same serialization format from an old field and passing
            # it in as a new field
            return klass(
                **{
                    key: value
                    for key, value in raw_dict.items() if key in args_for_class
                },
                new_name=raw_dict.get("old_name"),
            )

        @classmethod
        def value_to_storage_dict(
            cls,
            value,
            whitelist_map,
            descent_path,
        ):
            storage = super().value_to_storage_dict(value, whitelist_map,
                                                    descent_path)
            name = storage.get("new_name") or storage.get("old_name")
            if "new_name" in storage:
                del storage["new_name"]
            # typical pattern is to use the same serialization format
            # it in as a new field
            storage["old_name"] = name
            storage["__class__"] = "OldThing"  # persist using old class name
            return storage

    @_whitelist_for_serdes(whitelist_map=new_map, serializer=ThingSerializer)
    class NewThing(NamedTuple):
        new_name: str

        def get_id(self):
            json_rep = _serialize_dagster_namedtuple(self,
                                                     whitelist_map=new_map)
            return hash_str(json_rep)

    # exercising the old serialization format
    register_serdes_tuple_fallbacks({"OldThing": NewThing},
                                    whitelist_map=new_map)

    new_thing = NewThing("thing")
    new_thing_id = new_thing.get_id()
    new_thing_serialized = _serialize_dagster_namedtuple(new_thing, new_map)

    assert new_thing_id == old_thing_id
    assert new_thing_serialized == old_thing_serialized

    # ensure that the new serializer can correctly interpret old serialized data
    old_thing_deserialized = _deserialize_json(old_thing_serialized, new_map)
    assert isinstance(old_thing_deserialized, NewThing)
    assert old_thing_deserialized.get_id() == new_thing_id

    # ensure that the new things serialized can still be read by old code
    new_thing_deserialized = _deserialize_json(new_thing_serialized, old_map)
    assert isinstance(new_thing_deserialized, OldThing)
    assert new_thing_deserialized.get_id() == old_thing_id
Beispiel #12
0
def serdes_test_class(klass):
    test_map = WhitelistMap.create()

    return _whitelist_for_serdes(whitelist_map=test_map)(klass)
Beispiel #13
0
def test_legacy_event_log_load():
    # ensure EventLogEntry 0.14.3+ can still be loaded by older dagster versions
    # to avoid downgrades etc from creating operational issues
    legacy_env = WhitelistMap.create()

    # snapshot of EventLogEntry pre commit ea19544
    @_whitelist_for_serdes(
        whitelist_map=legacy_env,
        storage_name="EventLogEntry",  # use this to avoid collision with current EventLogEntry
    )
    class OldEventLogEntry(  # pylint: disable=unused-variable
        NamedTuple(
            "_OldEventLogEntry",
            [
                ("error_info", Optional[SerializableErrorInfo]),
                ("message", str),
                ("level", Union[str, int]),
                ("user_message", str),
                ("run_id", str),
                ("timestamp", float),
                ("step_key", Optional[str]),
                ("pipeline_name", Optional[str]),
                ("dagster_event", Optional[DagsterEvent]),
            ],
        )
    ):
        def __new__(
            cls,
            error_info,
            message,
            level,
            user_message,
            run_id,
            timestamp,
            step_key=None,
            pipeline_name=None,
            dagster_event=None,
            job_name=None,
        ):
            pipeline_name = pipeline_name or job_name
            return super().__new__(
                cls,
                check.opt_inst_param(error_info, "error_info", SerializableErrorInfo),
                check.str_param(message, "message"),
                level,  # coerce_valid_log_level call omitted
                check.str_param(user_message, "user_message"),
                check.str_param(run_id, "run_id"),
                check.float_param(timestamp, "timestamp"),
                check.opt_str_param(step_key, "step_key"),
                check.opt_str_param(pipeline_name, "pipeline_name"),
                check.opt_inst_param(dagster_event, "dagster_event", DagsterEvent),
            )

    # current event log entry
    new_event = EventLogEntry(
        user_message="test 1 2 3",
        error_info=None,
        level="debug",
        run_id="fake_run_id",
        timestamp=time.time(),
    )

    storage_str = serialize_dagster_namedtuple(new_event)

    result = _deserialize_json(storage_str, legacy_env)
    assert result.message is not None