def test_long_int(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class NumHolder(NamedTuple): num: int x = NumHolder(98765432109876543210) ser_x = _serialize_dagster_namedtuple(x, test_map) roundtrip_x = _deserialize_json(ser_x, test_map) assert x.num == roundtrip_x.num
def test_descent_path(): class Foo(NamedTuple): bar: int with pytest.raises(SerializationError, match=re.escape("Descent path: <root:dict>.a.b[2].c")): serialize_value({"a": {"b": [{}, {}, {"c": Foo(1)}]}}) test_map = WhitelistMap.create() blank_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class Fizz(NamedTuple): buzz: int ser = _serialize_dagster_namedtuple( {"a": {"b": [{}, {}, {"c": Fizz(1)}]}}, whitelist_map=test_map ) with pytest.raises(DeserializationError, match=re.escape("Descent path: <root:dict>.a.b[2].c")): _deserialize_json(ser, whitelist_map=blank_map)
def test_enum_backcompat(): test_env = WhitelistMap.create() class MyEnumSerializer(EnumSerializer): @classmethod def value_from_storage_str(cls, storage_str, klass): return getattr(klass, storage_str) @classmethod def value_to_storage_str(cls, value, whitelist_map, descent_path): val_as_str = str(value) actual_enum_val = val_as_str.split(".")[1:] backcompat_name = ( "OldEnum" # Simulate changing the storage name to some legacy backcompat name ) return ".".join([backcompat_name, *actual_enum_val]) @_whitelist_for_serdes(test_env, serializer=MyEnumSerializer) class MyEnum(Enum): RED = "color.red" BLUE = "color.red" # Ensure that serdes roundtrip preserves value register_serdes_enum_fallbacks({"OldEnum": MyEnum}, whitelist_map=test_env) my_enum = MyEnum("color.red") enum_json = serialize_value(my_enum, whitelist_map=test_env) result = _deserialize_json(enum_json, test_env) assert result == my_enum # ensure that "legacy" environment can correctly interpret enum stored under legacy name. legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class OldEnum(Enum): RED = "color.red" BLUE = "color.blue" result = _deserialize_json(enum_json, legacy_env) old_enum = OldEnum("color.red") assert old_enum == result
def test_persistent_tuple(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class Alphabet(namedtuple("_Alphabet", "a b c")): def __new__(cls, a, b, c): return super(Alphabet, cls).__new__(cls, a, b, c) foo = Alphabet(a="A", b="B", c="C") serialized = _serialize_dagster_namedtuple(foo, whitelist_map=test_map) foo_2 = _deserialize_json(serialized, whitelist_map=test_map) assert foo == foo_2
def test_pipeline_run_status_dagster_run_status(): # serialize in current code test_status = DagsterRunStatus("QUEUED") test_str = serialize_value(test_status) # deserialize in "legacy" code legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class PipelineRunStatus(Enum): QUEUED = "QUEUED" result = _deserialize_json(test_str, legacy_env) assert isinstance(result, PipelineRunStatus) assert result.value == test_status.value
def test_solid_handle_node_handle(): # serialize in current code test_handle = NodeHandle("test", None) test_str = serialize_dagster_namedtuple(test_handle) # deserialize in "legacy" code legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class SolidHandle(namedtuple("_SolidHandle", "name parent")): pass result = _deserialize_json(test_str, legacy_env) assert isinstance(result, SolidHandle) assert result.name == test_handle.name
def build_legacy_whitelist_map(): legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class ExternalJobOrigin( namedtuple("_ExternalJobOrigin", "external_repository_origin job_name")): def get_id(self): return create_snapshot_id(self) @_whitelist_for_serdes(legacy_env) class ExternalRepositoryOrigin( namedtuple("_ExternalRepositoryOrigin", "repository_location_origin repository_name")): def get_id(self): return create_snapshot_id(self) class GrpcServerOriginSerializer(DefaultNamedTupleSerializer): @classmethod def skip_when_empty(cls): return {"use_ssl"} @_whitelist_for_serdes(whitelist_map=legacy_env, serializer=GrpcServerOriginSerializer) class GrpcServerRepositoryLocationOrigin( namedtuple( "_GrpcServerRepositoryLocationOrigin", "host port socket location_name use_ssl", ), ): def __new__(cls, host, port=None, socket=None, location_name=None, use_ssl=None): return super(GrpcServerRepositoryLocationOrigin, cls).__new__(cls, host, port, socket, location_name, use_ssl) return ( legacy_env, ExternalJobOrigin, ExternalRepositoryOrigin, GrpcServerRepositoryLocationOrigin, )
def test_namedtuple_name_map(): wmap = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=wmap) class Thing(NamedTuple): name: str wmap.register_serialized_name("Thing", "SerializedThing") thing = Thing("foo") thing_serialized = _serialize_dagster_namedtuple(thing, wmap) assert seven.json.loads(thing_serialized)["__class__"] == "SerializedThing" with pytest.raises(DeserializationError): _deserialize_json(thing_serialized, wmap) wmap.register_deserialized_name("SerializedThing", "Thing") assert _deserialize_json(thing_serialized, wmap) == thing
def test_serdes_enum_backcompat(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class Corge(Enum): FOO = 1 BAR = 2 assert test_map.has_enum_entry("Corge") corge = Corge.FOO packed = pack_inner_value(corge, whitelist_map=test_map, descent_path="") class CorgeBackCompatSerializer(DefaultEnumSerializer): @classmethod def value_from_storage_str(cls, storage_str, klass): if storage_str == "FOO": value = "FOO_FOO" else: value = storage_str return super().value_from_storage_str(value, klass) # pylint: disable=function-redefined @_whitelist_for_serdes(whitelist_map=test_map, serializer=CorgeBackCompatSerializer) class Corge(Enum): BAR = 2 BAZ = 3 FOO_FOO = 4 unpacked = unpack_inner_value(packed, whitelist_map=test_map, descent_path="") assert unpacked != corge assert unpacked == Corge.FOO_FOO
def test_backward_compat_serdes(): test_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=test_map) class Quux(namedtuple("_Quux", "foo bar baz")): def __new__(cls, foo, bar, baz): return super(Quux, cls).__new__(cls, foo, bar, baz) # pylint: disable=bad-super-call quux = Quux("zip", "zow", "whoopie") serialized = _serialize_dagster_namedtuple(quux, whitelist_map=test_map) # pylint: disable=function-redefined @_whitelist_for_serdes(whitelist_map=test_map) class Quux(namedtuple("_Quux", "foo bar")): # pylint: disable=bad-super-call def __new__(cls, foo, bar): return super(Quux, cls).__new__(cls, foo, bar) deserialized = _deserialize_json(serialized, whitelist_map=test_map) assert deserialized != quux assert deserialized.foo == quux.foo assert deserialized.bar == quux.bar assert not hasattr(deserialized, "baz")
def test_namedtuple_backcompat(): old_map = WhitelistMap.create() @_whitelist_for_serdes(whitelist_map=old_map) class OldThing(NamedTuple): old_name: str def get_id(self): json_rep = _serialize_dagster_namedtuple(self, whitelist_map=old_map) return hash_str(json_rep) # create the old things old_thing = OldThing("thing") old_thing_id = old_thing.get_id() old_thing_serialized = _serialize_dagster_namedtuple(old_thing, old_map) new_map = WhitelistMap.create() class ThingSerializer(DefaultNamedTupleSerializer): @classmethod def value_from_storage_dict(cls, storage_dict, klass, args_for_class, whitelist_map, descent_path): raw_dict = { key: unpack_inner_value(value, whitelist_map, f"{descent_path}.{key}") for key, value in storage_dict.items() } # typical pattern is to use the same serialization format from an old field and passing # it in as a new field return klass( **{ key: value for key, value in raw_dict.items() if key in args_for_class }, new_name=raw_dict.get("old_name"), ) @classmethod def value_to_storage_dict( cls, value, whitelist_map, descent_path, ): storage = super().value_to_storage_dict(value, whitelist_map, descent_path) name = storage.get("new_name") or storage.get("old_name") if "new_name" in storage: del storage["new_name"] # typical pattern is to use the same serialization format # it in as a new field storage["old_name"] = name storage["__class__"] = "OldThing" # persist using old class name return storage @_whitelist_for_serdes(whitelist_map=new_map, serializer=ThingSerializer) class NewThing(NamedTuple): new_name: str def get_id(self): json_rep = _serialize_dagster_namedtuple(self, whitelist_map=new_map) return hash_str(json_rep) # exercising the old serialization format register_serdes_tuple_fallbacks({"OldThing": NewThing}, whitelist_map=new_map) new_thing = NewThing("thing") new_thing_id = new_thing.get_id() new_thing_serialized = _serialize_dagster_namedtuple(new_thing, new_map) assert new_thing_id == old_thing_id assert new_thing_serialized == old_thing_serialized # ensure that the new serializer can correctly interpret old serialized data old_thing_deserialized = _deserialize_json(old_thing_serialized, new_map) assert isinstance(old_thing_deserialized, NewThing) assert old_thing_deserialized.get_id() == new_thing_id # ensure that the new things serialized can still be read by old code new_thing_deserialized = _deserialize_json(new_thing_serialized, old_map) assert isinstance(new_thing_deserialized, OldThing) assert new_thing_deserialized.get_id() == old_thing_id
def serdes_test_class(klass): test_map = WhitelistMap.create() return _whitelist_for_serdes(whitelist_map=test_map)(klass)
def test_legacy_event_log_load(): # ensure EventLogEntry 0.14.3+ can still be loaded by older dagster versions # to avoid downgrades etc from creating operational issues legacy_env = WhitelistMap.create() # snapshot of EventLogEntry pre commit ea19544 @_whitelist_for_serdes( whitelist_map=legacy_env, storage_name="EventLogEntry", # use this to avoid collision with current EventLogEntry ) class OldEventLogEntry( # pylint: disable=unused-variable NamedTuple( "_OldEventLogEntry", [ ("error_info", Optional[SerializableErrorInfo]), ("message", str), ("level", Union[str, int]), ("user_message", str), ("run_id", str), ("timestamp", float), ("step_key", Optional[str]), ("pipeline_name", Optional[str]), ("dagster_event", Optional[DagsterEvent]), ], ) ): def __new__( cls, error_info, message, level, user_message, run_id, timestamp, step_key=None, pipeline_name=None, dagster_event=None, job_name=None, ): pipeline_name = pipeline_name or job_name return super().__new__( cls, check.opt_inst_param(error_info, "error_info", SerializableErrorInfo), check.str_param(message, "message"), level, # coerce_valid_log_level call omitted check.str_param(user_message, "user_message"), check.str_param(run_id, "run_id"), check.float_param(timestamp, "timestamp"), check.opt_str_param(step_key, "step_key"), check.opt_str_param(pipeline_name, "pipeline_name"), check.opt_inst_param(dagster_event, "dagster_event", DagsterEvent), ) # current event log entry new_event = EventLogEntry( user_message="test 1 2 3", error_info=None, level="debug", run_id="fake_run_id", timestamp=time.time(), ) storage_str = serialize_dagster_namedtuple(new_event) result = _deserialize_json(storage_str, legacy_env) assert result.message is not None