Exemple #1
0
def test_from_storage_dict():
    old_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=old_map)
    class MyThing(NamedTuple):
        orig_name: str

    serialized_old = _serialize_dagster_namedtuple(MyThing("old"),
                                                   whitelist_map=old_map)

    class CompatSerializer(DefaultNamedTupleSerializer):
        @classmethod
        def value_from_storage_dict(cls, storage_dict, klass, args_for_class,
                                    whitelist_map, descent_path):
            # simplified demo of field renaming
            return klass(
                storage_dict.get("orig_name") or storage_dict.get("new_name"))

    new_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=new_map, serializer=CompatSerializer)  # pylint: disable=function-redefined
    class MyThing(NamedTuple):
        new_name: str

    deser_old_val = _deserialize_json(serialized_old, whitelist_map=new_map)

    assert deser_old_val.new_name == "old"

    serialized_new = _serialize_dagster_namedtuple(MyThing("new"),
                                                   whitelist_map=new_map)
    deser_new_val = _deserialize_json(serialized_new, whitelist_map=new_map)
    assert deser_new_val.new_name == "new"
Exemple #2
0
def test_set():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class HasSets(namedtuple("_HasSets", "reg_set frozen_set")):
        def __new__(cls, reg_set, frozen_set):
            set_param(reg_set, "reg_set")
            inst_param(frozen_set, "frozen_set", frozenset)
            return super(HasSets, cls).__new__(cls, reg_set, frozen_set)

    foo = HasSets({1, 2, 3, "3"}, frozenset([4, 5, 6, "6"]))

    serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map)
    foo_2 = _deserialize_json(serialized, whitelist_map=test_map)
    assert foo == foo_2

    # verify that set elements are serialized in a consistent order so that
    # equal objects always have a consistent serialization / snapshot ID
    big_foo = HasSets(set(string.ascii_lowercase),
                      frozenset(string.ascii_lowercase))

    snap_id = hash_str(
        _serialize_dagster_namedtuple(big_foo, whitelist_map=test_map))
    roundtrip_snap_id = hash_str(
        _serialize_dagster_namedtuple(
            _deserialize_json(
                _serialize_dagster_namedtuple(big_foo, whitelist_map=test_map),
                whitelist_map=test_map,
            ),
            whitelist_map=test_map,
        ))
    assert snap_id == roundtrip_snap_id
Exemple #3
0
def test_skip_when_empty():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class SameSnapshotTuple(namedtuple("_Tuple", "foo")):
        def __new__(cls, foo):
            return super(SameSnapshotTuple, cls).__new__(cls, foo)  # pylint: disable=bad-super-call

    old_tuple = SameSnapshotTuple(foo="A")
    old_serialized = _serialize_dagster_namedtuple(old_tuple,
                                                   whitelist_map=test_map)
    old_snapshot = hash_str(old_serialized)

    # Without setting skip_when_empty, the ID changes

    @_whitelist_for_serdes(whitelist_map=test_map)  # pylint: disable=function-redefined
    class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")):
        def __new__(cls, foo, bar=None):
            return super(SameSnapshotTuple, cls).__new__(  # pylint: disable=bad-super-call
                cls, foo, bar)

    new_tuple_without_serializer = SameSnapshotTuple(foo="A")
    new_snapshot_without_serializer = hash_str(
        _serialize_dagster_namedtuple(new_tuple_without_serializer,
                                      whitelist_map=test_map))

    assert new_snapshot_without_serializer != old_snapshot

    # By setting a custom serializer and skip_when_empty, the snapshot stays the same
    # as long as the new field is None

    class SkipWhenEmptySerializer(DefaultNamedTupleSerializer):
        @classmethod
        def skip_when_empty(cls) -> Set[str]:
            return {"bar"}

    @_whitelist_for_serdes(whitelist_map=test_map,
                           serializer=SkipWhenEmptySerializer)  # pylint: disable=function-redefined
    class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")):
        def __new__(cls, foo, bar=None):
            return super(SameSnapshotTuple, cls).__new__(  # pylint: disable=bad-super-call
                cls, foo, bar)

    for bar_val in [None, [], {}, set()]:
        new_tuple = SameSnapshotTuple(foo="A", bar=bar_val)
        new_snapshot = hash_str(
            _serialize_dagster_namedtuple(new_tuple, whitelist_map=test_map))

        assert old_snapshot == new_snapshot

        rehydrated_tuple = _deserialize_json(old_serialized,
                                             whitelist_map=test_map)
        assert rehydrated_tuple.foo == "A"
        assert rehydrated_tuple.bar is None

    new_tuple_with_bar = SameSnapshotTuple(foo="A", bar="B")
    assert new_tuple_with_bar.foo == "A"
    assert new_tuple_with_bar.bar == "B"
Exemple #4
0
def test_forward_compat_serdes_new_field_with_default():
    test_map = WhitelistMap()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Quux(namedtuple("_Quux", "foo bar")):
        def __new__(cls, foo, bar):
            return super(Quux, cls).__new__(cls, foo, bar)  # pylint: disable=bad-super-call

    assert test_map.has_tuple_entry("Quux")
    klass, _ = test_map.get_tuple_entry("Quux")
    assert klass is Quux

    quux = Quux("zip", "zow")

    serialized = _serialize_dagster_namedtuple(quux, whitelist_map=test_map)

    # pylint: disable=function-redefined
    @_whitelist_for_serdes(whitelist_map=test_map)
    class Quux(namedtuple("_Quux", "foo bar baz")):  # pylint: disable=bad-super-call
        def __new__(cls, foo, bar, baz=None):
            return super(Quux, cls).__new__(cls, foo, bar, baz=baz)

    assert test_map.has_tuple_entry("Quux")

    klass, _ = test_map.get_tuple_entry("Quux")
    assert klass is Quux

    deserialized = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map)

    assert deserialized != quux
    assert deserialized.foo == quux.foo
    assert deserialized.bar == quux.bar
    assert deserialized.baz is None
Exemple #5
0
def test_to_storage_value():
    test_map = WhitelistMap()

    class MySerializer(DefaultNamedTupleSerializer):
        @staticmethod
        def value_to_storage_dict(value, whitelist_map):
            return DefaultNamedTupleSerializer.value_to_storage_dict(
                SubstituteAlphabet(value.a, value.b, value.c), test_map
            )

    @_whitelist_for_serdes(whitelist_map=test_map, serializer=MySerializer)
    class DeprecatedAlphabet(namedtuple("_DeprecatedAlphabet", "a b c")):
        def __new__(cls, a, b, c):
            return super(DeprecatedAlphabet, cls).__new__(cls, a, b, c)

    @_whitelist_for_serdes(whitelist_map=test_map)
    class SubstituteAlphabet(namedtuple("_SubstituteAlphabet", "a b c")):
        def __new__(cls, a, b, c):
            return super(SubstituteAlphabet, cls).__new__(cls, a, b, c)

    nested = DeprecatedAlphabet(None, None, "_C")
    deprecated = DeprecatedAlphabet("A", "B", nested)
    serialized = _serialize_dagster_namedtuple(deprecated, whitelist_map=test_map)
    alphabet = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map)
    assert not isinstance(alphabet, DeprecatedAlphabet)
    assert isinstance(alphabet, SubstituteAlphabet)
    assert not isinstance(alphabet.c, DeprecatedAlphabet)
    assert isinstance(alphabet.c, SubstituteAlphabet)
Exemple #6
0
def test_descent_path():
    class Foo(NamedTuple):
        bar: int

    with pytest.raises(SerializationError,
                       match=re.escape("Descent path: <root:dict>.a.b[2].c")):
        serialize_value({"a": {"b": [{}, {}, {"c": Foo(1)}]}})

    test_map = WhitelistMap.create()
    blank_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Fizz(NamedTuple):
        buzz: int

    ser = _serialize_dagster_namedtuple({"a": {
        "b": [{}, {}, {
            "c": Fizz(1)
        }]
    }},
                                        whitelist_map=test_map)

    with pytest.raises(DeserializationError,
                       match=re.escape("Descent path: <root:dict>.a.b[2].c")):
        _deserialize_json(ser, whitelist_map=blank_map)
Exemple #7
0
def test_forward_compat():
    old_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=old_map)
    class Quux(namedtuple("_Quux", "bar baz")):
        def __new__(cls, bar, baz):
            return super().__new__(cls, bar, baz)

    # new version has a new field with a new type
    new_map = WhitelistMap.create()

    # pylint: disable=function-redefined
    @_whitelist_for_serdes(whitelist_map=new_map)
    class Quux(namedtuple("_Quux", "foo bar baz")):
        def __new__(cls, foo, bar, baz):
            return super().__new__(cls, foo, bar, baz)

    @_whitelist_for_serdes(whitelist_map=new_map)
    class Foo(namedtuple("_Foo", "wow")):
        def __new__(cls, wow):
            return super().__new__(cls, wow)

    new_quux = Quux(foo=Foo("wow"), bar="bar", baz="baz")

    # write from new
    serialized = _serialize_dagster_namedtuple(new_quux, whitelist_map=new_map)

    # read from old, foo ignored
    deserialized = _deserialize_json(serialized, whitelist_map=old_map)
    assert deserialized.bar == "bar"
    assert deserialized.baz == "baz"
Exemple #8
0
def test_backward_compat_serdes():
    test_map = WhitelistMap()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Quux(namedtuple("_Quux", "foo bar baz")):
        def __new__(cls, foo, bar, baz):
            return super(Quux, cls).__new__(cls, foo, bar, baz)  # pylint: disable=bad-super-call

    quux = Quux("zip", "zow", "whoopie")

    serialized = _serialize_dagster_namedtuple(quux, whitelist_map=test_map)

    # pylint: disable=function-redefined
    @_whitelist_for_serdes(whitelist_map=test_map)
    class Quux(namedtuple("_Quux", "foo bar")):  # pylint: disable=bad-super-call
        def __new__(cls, foo, bar):
            return super(Quux, cls).__new__(cls, foo, bar)

    deserialized = _deserialize_json_to_dagster_namedtuple(
        serialized, whitelist_map=test_map)

    assert deserialized != quux
    assert deserialized.foo == quux.foo
    assert deserialized.bar == quux.bar
    assert not hasattr(deserialized, "baz")
Exemple #9
0
def test_long_int():
    test_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class NumHolder(NamedTuple):
        num: int

    x = NumHolder(98765432109876543210)
    ser_x = _serialize_dagster_namedtuple(x, test_map)
    roundtrip_x = _deserialize_json(ser_x, test_map)
    assert x.num == roundtrip_x.num
Exemple #10
0
def test_persistent_tuple():
    test_map = WhitelistMap()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class Alphabet(namedtuple("_Alphabet", "a b c")):
        def __new__(cls, a, b, c):
            return super(Alphabet, cls).__new__(cls, a, b, c)

    foo = Alphabet(a="A", b="B", c="C")
    serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map)
    foo_2 = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map)
    assert foo == foo_2
Exemple #11
0
def test_set():
    test_map = WhitelistMap()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class HasSets(namedtuple("_HasSets", "reg_set frozen_set")):
        def __new__(cls, reg_set, frozen_set):
            set_param(reg_set, "reg_set")
            inst_param(frozen_set, "frozen_set", frozenset)
            return super(HasSets, cls).__new__(cls, reg_set, frozen_set)

    foo = HasSets({1, 2, 3, "3"}, frozenset([4, 5, 6, "6"]))

    serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map)
    foo_2 = _deserialize_json_to_dagster_namedtuple(serialized, whitelist_map=test_map)
    assert foo == foo_2
Exemple #12
0
def test_namedtuple_name_map():

    wmap = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=wmap)
    class Thing(NamedTuple):
        name: str

    wmap.register_serialized_name("Thing", "SerializedThing")
    thing = Thing("foo")

    thing_serialized = _serialize_dagster_namedtuple(thing, wmap)
    assert seven.json.loads(thing_serialized)["__class__"] == "SerializedThing"

    with pytest.raises(DeserializationError):
        _deserialize_json(thing_serialized, wmap)

    wmap.register_deserialized_name("SerializedThing", "Thing")
    assert _deserialize_json(thing_serialized, wmap) == thing
Exemple #13
0
 def get_id(self):
     json_rep = _serialize_dagster_namedtuple(self,
                                              whitelist_map=new_map)
     return hash_str(json_rep)
Exemple #14
0
def test_namedtuple_backcompat():
    old_map = WhitelistMap.create()

    @_whitelist_for_serdes(whitelist_map=old_map)
    class OldThing(NamedTuple):
        old_name: str

        def get_id(self):
            json_rep = _serialize_dagster_namedtuple(self,
                                                     whitelist_map=old_map)
            return hash_str(json_rep)

    # create the old things
    old_thing = OldThing("thing")
    old_thing_id = old_thing.get_id()
    old_thing_serialized = _serialize_dagster_namedtuple(old_thing, old_map)

    new_map = WhitelistMap.create()

    class ThingSerializer(DefaultNamedTupleSerializer):
        @classmethod
        def value_from_storage_dict(cls, storage_dict, klass, args_for_class,
                                    whitelist_map, descent_path):
            raw_dict = {
                key: unpack_inner_value(value, whitelist_map,
                                        f"{descent_path}.{key}")
                for key, value in storage_dict.items()
            }
            # typical pattern is to use the same serialization format from an old field and passing
            # it in as a new field
            return klass(
                **{
                    key: value
                    for key, value in raw_dict.items() if key in args_for_class
                },
                new_name=raw_dict.get("old_name"),
            )

        @classmethod
        def value_to_storage_dict(
            cls,
            value,
            whitelist_map,
            descent_path,
        ):
            storage = super().value_to_storage_dict(value, whitelist_map,
                                                    descent_path)
            name = storage.get("new_name") or storage.get("old_name")
            if "new_name" in storage:
                del storage["new_name"]
            # typical pattern is to use the same serialization format
            # it in as a new field
            storage["old_name"] = name
            storage["__class__"] = "OldThing"  # persist using old class name
            return storage

    @_whitelist_for_serdes(whitelist_map=new_map, serializer=ThingSerializer)
    class NewThing(NamedTuple):
        new_name: str

        def get_id(self):
            json_rep = _serialize_dagster_namedtuple(self,
                                                     whitelist_map=new_map)
            return hash_str(json_rep)

    # exercising the old serialization format
    register_serdes_tuple_fallbacks({"OldThing": NewThing},
                                    whitelist_map=new_map)

    new_thing = NewThing("thing")
    new_thing_id = new_thing.get_id()
    new_thing_serialized = _serialize_dagster_namedtuple(new_thing, new_map)

    assert new_thing_id == old_thing_id
    assert new_thing_serialized == old_thing_serialized

    # ensure that the new serializer can correctly interpret old serialized data
    old_thing_deserialized = _deserialize_json(old_thing_serialized, new_map)
    assert isinstance(old_thing_deserialized, NewThing)
    assert old_thing_deserialized.get_id() == new_thing_id

    # ensure that the new things serialized can still be read by old code
    new_thing_deserialized = _deserialize_json(new_thing_serialized, old_map)
    assert isinstance(new_thing_deserialized, OldThing)
    assert new_thing_deserialized.get_id() == old_thing_id