def test_deserialize_edges(): """ Tests that edges are appropriately deserialized, even in they involve keys. Also tests that tasks are deserialized in a way that reuses them in edges -- in other words, when edges are loaded they use their corresponding task IDs to access the correct Task objects out of a cache. """ class ArgTask(Task): def run(self, x): return x f = Flow(name="test") t1, t2, t3 = Task("a"), Task("b"), ArgTask("c") f.add_edge(t1, t2) f.add_edge(t2, t3, key="x") f.add_edge(t1, t3, mapped=True) serialized = FlowSchema().dump(f) deserialized = FlowSchema().load(serialized) d1, d2, d3 = sorted(deserialized.tasks, key=lambda t: t.name) assert deserialized.edges == { Edge(d1, d2), Edge(d2, d3, key="x"), Edge(d1, d3, mapped=True), }
def test_serialize_container_environment(): storage = prefect.environments.storage.Docker( base_image="a", python_dependencies=["b", "c"], registry_url="f") deserialized = FlowSchema().load(FlowSchema().dump( Flow(name="test", storage=storage))) assert isinstance(deserialized.storage, prefect.environments.storage.Docker) assert deserialized.storage.registry_url == storage.registry_url
def test_serialize_container_environment(): env = prefect.environments.DockerEnvironment( base_image="a", python_dependencies=["b", "c"], registry_url="f") deserialized = FlowSchema().load(FlowSchema().dump( Flow(name="test", environment=env))) assert isinstance(deserialized.environment, prefect.environments.DockerEnvironment) assert deserialized.environment.base_image == env.base_image assert deserialized.environment.registry_url == env.registry_url
def test_reference_tasks(): x = Task("x") y = Task("y") z = Task("z") f = Flow(name="test", tasks=[x, y, z]) f.set_reference_tasks([y]) assert f.reference_tasks() == {y} f2 = FlowSchema().load(FlowSchema().dump(f)) assert f2.reference_tasks() == {t for t in f2.tasks if t.name == "y"}
def test_deserialize_flow_subclass_is_flow_but_not_flow_subclass(): class NewFlow(Flow): pass serialized = FlowSchema().dump(NewFlow(name="test")) assert serialized["type"].endswith("<locals>.NewFlow") deserialized = FlowSchema().load(serialized) assert isinstance(deserialized, Flow) assert not isinstance(deserialized, NewFlow)
def test_deserialize_with_parameters_key(): f = Flow(name="test") x = Parameter("x") f.add_task(x) f2 = FlowSchema().load(FlowSchema().dump(f)) assert {p.name for p in f2.parameters()} == {p.name for p in f.parameters()} f_params = {(p.name, p.required, p.default) for p in f.parameters()} f2_params = {(p.name, p.required, p.default) for p in f2.parameters()} assert f_params == f2_params
def test_deserialize_schedule_doesnt_mutate_original(): schedule = prefect.schedules.Schedule( clocks=[], filters=[ prefect.schedules.filters.between_times(datetime.time(1), datetime.time(2)) ], ) f = Flow(name="test", schedule=schedule) serialized = FlowSchema().dump(f) deserialized = FlowSchema().load(serialized) kwargs = serialized["schedule"]["filters"][0]["kwargs"] assert isinstance(kwargs["start"], str) assert isinstance(kwargs["end"], str)
def flow(self) -> "prefect.Flow": """ Deserialize the flow from the backend into a 'Flow' object. The deserialized flow is not expected to contain all of the same data as the flow object that was originally registered. The backend is free to manipulate the serialized flow for optimization. """ # Perform this deserialization lazily (in a property) because it is more likely # to fail and we do not want to break the entire view because of bad flow data return FlowSchema().load(data=self.serialized_flow, partial=True, unknown=marshmallow.EXCLUDE)
def test_parameters(): f = Flow(name="test") x = Parameter("x") y = Parameter("y", default=5) f.add_task(x) f.add_task(y) serialized = FlowSchema().dump(f) assert "parameters" in serialized assert [ isinstance(ParameterSchema().load(p), Parameter) for p in serialized["parameters"] ]
def _from_flow_data(cls, flow_data: dict, **kwargs: Any) -> "FlowView": """ Instantiate a `FlowView` from serialized data This method deserializes objects into their Prefect types. Args: - flow_data: The dict of serialized data - **kwargs: Additional kwargs are passed to __init__ and overrides attributes from `flow_data` """ flow_data = flow_data.copy() flow_id = flow_data.pop("id") flow_group_data = flow_data.pop("flow_group") flow_group_labels = flow_group_data["labels"] project_name = flow_data.pop("project")["name"] deserialized_flow = FlowSchema().load( data=flow_data["serialized_flow"]) storage = StorageSchema().load(flow_data.pop("storage")) run_config = RunConfigSchema().load(flow_data.pop("run_config")) # Combine the data from `flow_data` with `kwargs` flow_args = { **dict( flow_id=flow_id, project_name=project_name, flow=deserialized_flow, storage=storage, flow_group_labels=flow_group_labels, run_config=run_config, **flow_data, ), **kwargs, } return cls(**flow_args)
def test_deserialize_schedule(): schedule = prefect.schedules.CronSchedule("0 0 * * *") f = Flow(name="test", schedule=schedule) serialized = FlowSchema().dump(f) deserialized = FlowSchema().load(serialized) assert deserialized.schedule.next(5) == f.schedule.next(5)
def test_deserialize_serialized_flow_after_build(): flow = Flow(name="test", environment=prefect.environments.LocalEnvironment()) serialized_flow = flow.serialize(build=True) deserialized = FlowSchema().load(serialized_flow) assert isinstance(deserialized, Flow)
def test_deserialize_flow(): serialized = FlowSchema().dump(Flow(name="n")) deserialized = FlowSchema().load(serialized) assert isinstance(deserialized, Flow) assert deserialized.name == "n"
def test_serialize_empty_dict_contains_only_basic_fields(): assert FlowSchema().dump({}) == { "__version__": prefect.__version__, "type": "builtins.dict", }
def test_deserialize_serialized_flow_after_build(tmpdir): flow = Flow(name="test", storage=prefect.environments.storage.Local(tmpdir)) serialized_flow = flow.serialize(build=True) deserialized = FlowSchema().load(serialized_flow) assert isinstance(deserialized, Flow)
def test_serialize_flow(): serialized = FlowSchema().dump(Flow(name="n")) assert serialized["name"] == "n"
def test_deserialize_id(): f = Flow(name="test") serialized = FlowSchema().dump(f) deserialized = FlowSchema().load(serialized) assert deserialized.id == f.id
def test_old_flows_deserialize(flow_json): with open(flow_json, "r") as f: payload = json.load(f) flow = FlowSchema().load(payload) assert flow.name
def test_deserialize_tasks(): tasks = [Task(n) for n in ["a", "b", "c"]] f = Flow(name="test", tasks=tasks) serialized = FlowSchema().dump(f) deserialized = FlowSchema().load(serialized) assert len(deserialized.tasks) == len(f.tasks)
def test_serialize_empty_dict(): assert FlowSchema().dump({})
f.set_reference_tasks([y]) assert f.reference_tasks() == {y} f2 = FlowSchema().load(f.serialize()) assert f2.reference_tasks() == {t for t in f2.tasks if t.name == "y"} def test_serialize_container_environment(): <<<<<<< HEAD storage = prefect.storage.Docker( ======= storage = prefect.environments.storage.Docker( >>>>>>> prefect clone base_image="a", python_dependencies=["b", "c"], registry_url="f" ) deserialized = FlowSchema().load( FlowSchema().dump(Flow(name="test", storage=storage)) ) <<<<<<< HEAD assert isinstance(deserialized.storage, prefect.storage.Docker) ======= assert isinstance(deserialized.storage, prefect.environments.storage.Docker) >>>>>>> prefect clone assert deserialized.storage.registry_url == storage.registry_url def test_deserialize_serialized_flow_after_build(tmpdir): <<<<<<< HEAD flow = Flow(name="test", storage=prefect.storage.Local(tmpdir)) ======= flow = Flow(name="test", storage=prefect.environments.storage.Local(tmpdir)) >>>>>>> prefect clone