def test_validate_missing_reference_tasks(): f = Flow(name="test") t1 = Task() t2 = Task() f.add_task(t1) f.add_task(t2) f.set_reference_tasks([t1]) f.tasks.remove(t1) with pytest.raises(ValueError) as exc: f.validate() assert "reference tasks are not contained" in str(exc.value).lower()
def test_infer_terminal_tasks(): with Flow(name="test") as f: t1 = Task() t2 = Task() t3 = Task() t4 = Task() f.add_edge(t1, t2) f.add_edge(t2, t3) f.add_task(t4) assert f.terminal_tasks() == set([t3, t4])
def test_reference_tasks_are_terminal_tasks_by_default(): with Flow(name="test") as f: t1 = Task() t2 = Task() t3 = Task() t4 = Task() f.add_edge(t1, t2) f.add_edge(t2, t3) f.add_task(t4) assert f.reference_tasks() == f.terminal_tasks() == set([t3, t4])
def test_eager_cycle_detection_works(): with set_temporary_config({"flows.eager_edge_validation": True}): f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) with pytest.raises(ValueError): f.add_edge(t2, t1) assert not prefect.config.flows.eager_edge_validation
def test_eager_cycle_detection_defaults_false(): assert not prefect.config.flows.eager_edge_validation f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) # no cycle detected assert f.add_edge(t2, t1) with pytest.raises(ValueError): f.validate()
def test_cache_all_downstream_edges(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.all_downstream_edges() key = ("all_downstream_edges", ()) f._cache[key] = 1 assert f.all_downstream_edges() == 1 f.add_edge(t2, t3) assert f.all_downstream_edges() != 1
def test_sorted_tasks(): """ t1 -> t2 -> t3 -> t4 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") t4 = Task("4") f.add_edge(t1, t2) f.add_edge(t2, t3) f.add_edge(t3, t4) assert f.sorted_tasks() == (t1, t2, t3, t4)
def test_set_reference_tasks(): with Flow(name="test") as f: t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.add_edge(t2, t3) f.set_reference_tasks([]) assert f.reference_tasks() == f.terminal_tasks() f.set_reference_tasks([t2]) assert f.reference_tasks() == set([t2])
def test_merge(): f1 = Flow(name="test") f2 = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f1.add_edge(t1, t2) f2.add_edge(t2, t3) f2.update(f1) assert f2.tasks == set([t1, t2, t3]) assert len(f2.edges) == 2
def test_context_manager_is_properly_applied_to_tasks(): t1 = Task() t2 = Task() t3 = Task() with Flow(name="test") as f1: with Flow(name="test") as f2: t2.bind() t1.bind() with pytest.raises(ValueError): t3.bind() assert f1.tasks == set([t1]) assert f2.tasks == set([t2])
def test_sorted_tasks_with_invalid_start_task(): """ t1 -> t2 -> t3 -> t4 t3 -> t5 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") f.add_edge(t1, t2) with pytest.raises(ValueError) as exc: f.sorted_tasks(root_tasks=[t3]) assert "not found in Flow" in str(exc.value)
def test_works_with_multiple_upstream_states(self): upstream_task_1 = Task(name="upstream_task_one") upstream_state_1 = State(result=1) upstream_task_2 = Task(name="upstream_task_two") upstream_state_2 = State(result=2) downstream_task = Task(name="downstream_task") upstream_states = { Edge(upstream_task_1, downstream_task, key="var_1"): upstream_state_1, Edge(upstream_task_2, downstream_task, key="var_2"): upstream_state_2 } mapping = dsh._create_input_mapping(upstream_states) assert mapping == {"var_1": 1, "var_2": 2}
def test_equality_based_on_edges(self): f1 = Flow(name="test") f2 = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() for f in [f1, f2]: f.add_edge(t1, t2) f.add_edge(t1, t3) assert f1 == f2 f2.add_edge(t2, t3) assert f1 != f2
def test_cache_survives_pickling(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.sorted_tasks() key = ("_sorted_tasks", (("root_tasks", ()), )) f._cache[key] = 1 assert f.sorted_tasks() == 1 f2 = cloudpickle.loads(cloudpickle.dumps(f)) assert f2.sorted_tasks() == 1 f2.add_edge(t2, t3) assert f2.sorted_tasks() != 1
def test_get_tasks_can_check_types(self): class Specific(Task): pass t1, t2 = Task(name="t1", tags=["a", "b"]), Specific(name="t1", tags=["a"]) f = Flow(name="test", tasks=[t1, t2]) assert f.get_tasks(task_type=Specific) == [t2]
def test_create_flow_with_edges(self): f1 = Flow( name="test", edges=[Edge(upstream_task=Task(), downstream_task=AddTask(), key="x")], ) assert len(f1.edges) == 1 assert len(f1.tasks) == 2
def test_viz_renders_if_ipython_isnt_installed_or_errors(self, error): graphviz = MagicMock() ipython = MagicMock(get_ipython=MagicMock(side_effect=error)) with patch.dict("sys.modules", graphviz=graphviz, IPython=ipython): with Flow(name="test") as f: res = AddTask(name="a_nice_task").map(x=Task(name="a_list_task"), y=8) f.visualize()
def test_replace_replaces_all_the_things(self): with Flow(name="test") as f: t1 = Task(name="t1")() t2 = Task(name="t2")(upstream_tasks=[t1]) t3 = Task(name="t3") f.set_reference_tasks([t1]) f.replace(t1, t3) assert f.tasks == {t2, t3} assert {e.upstream_task for e in f.edges} == {t3} assert {e.downstream_task for e in f.edges} == {t2} assert f.reference_tasks() == {t3} assert f.terminal_tasks() == {t2} with pytest.raises(ValueError): f.edges_to(t1)
def test_setting_reference_tasks_clears_cache(self): f = Flow(name="test") t1 = Task() f.add_task(t1) f._cache[1] = 2 f.set_reference_tasks([t1]) assert 1 not in f._cache
def test_does_not_look_for_file_when_no_result_handler_given(self): task = Task(name="Task") task_runner = DSTaskRunner(task) # Does not get upstream_states by # default, so normally would throw an error if a result handler was passed old_state = Pending() new_state = Running() dsh.checkpoint_handler(task_runner, old_state, new_state)
def test_viz_reflects_multiple_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state1 = Mapped(map_states=[Success(), TriggerFailed()]) map_state2 = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: first_res = add.map(x=list_task, y=8) with pytest.warns( UserWarning ): # making a copy of a task with dependencies res = first_res.map(x=first_res, y=9) graph = f.visualize(flow_state=Success( result={ res: map_state1, list_task: Success(), first_res: map_state2, })) assert "{first} -> {second} [label=x style=dashed]".format( first=str(id(first_res)) + "0", second=str(id(res)) + "0") assert "{first} -> {second} [label=x style=dashed]".format( first=str(id(first_res)) + "1", second=str(id(res)) + "1")
def test_viz_reflects_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: res = add.map(x=list_task, y=8) graph = f.visualize(flow_state=Success(result={ res: map_state, list_task: Success() })) # one colored node for each mapped result assert 'label="a_nice_task <map>" color="#00800080"' in graph.source assert 'label="a_nice_task <map>" color="#FF000080"' in graph.source assert 'label=a_list_task color="#00800080"' in graph.source assert 'label=8 color="#00000080"' in graph.source # two edges for each input to add() for var in ["x", "y"]: for index in [0, 1]: assert "{0} [label={1} style=dashed]".format( index, var) in graph.source
def test_equality_based_on_reference_tasks(self): f1 = Flow(name="test") f2 = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() for f in [f1, f2]: f.add_edge(t1, t2) f.add_edge(t1, t3) f1.set_reference_tasks([t2]) assert f1 != f2 f2.set_reference_tasks([t2]) assert f1 == f2
def test_copy(): with Flow(name="test") as f: t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.add_edge(t2, t3) f.set_reference_tasks([t1]) f2 = f.copy() assert f2 == f f.add_edge(Task(), Task()) assert len(f2.tasks) == len(f.tasks) - 2 assert len(f2.edges) == len(f.edges) - 1 assert f.reference_tasks() == f2.reference_tasks() == set([t1])
def test_raises_appropriate_error_when_incompatible_handler_given(self): task = Task(name="Task", result_handler=LocalResultHandler()) task_runner = DSTaskRunner(task) task_runner.upstream_states = {} old_state = Pending() new_state = Running() with pytest.raises(TypeError): dsh.checkpoint_handler(task_runner, old_state, new_state)
def test_sorted_tasks_with_start_task(): """ t1 -> t2 -> t3 -> t4 t3 -> t5 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") t4 = Task("4") t5 = Task("5") f.add_edge(t1, t2) f.add_edge(t2, t3) f.add_edge(t3, t4) f.add_edge(t3, t5) assert set(f.sorted_tasks(root_tasks=[])) == set([t1, t2, t3, t4, t5]) assert set(f.sorted_tasks(root_tasks=[t3])) == set([t3, t4, t5])
def test_set_dependencies_adds_all_arguments_to_flow(): f = Flow(name="test") class ArgTask(Task): def run(self, x): return x t1 = ArgTask() t2 = Task() t3 = Task() t4 = Task() f.set_dependencies( task=t1, upstream_tasks=[t2], downstream_tasks=[t3], keyword_tasks={"x": t4} ) assert f.tasks == set([t1, t2, t3, t4])
def test_sorted_tasks_with_ambiguous_sort(): """ t1 -> bottleneck t2 -> bottleneck t3 -> bottleneck bottleneck -> t4 bottleneck -> t5 bottleneck -> t6 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") t4 = Task("4") t5 = Task("5") t6 = Task("6") bottleneck = Task("bottleneck") f.add_edge(t1, bottleneck) f.add_edge(t2, bottleneck) f.add_edge(t3, bottleneck) f.add_edge(bottleneck, t4) f.add_edge(bottleneck, t5) f.add_edge(bottleneck, t6) tasks = f.sorted_tasks() assert set(tasks[:3]) == set([t1, t2, t3]) assert list(tasks)[3] is bottleneck assert set(tasks[4:]) == set([t4, t5, t6])
def test_cache_sorted_tasks(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.sorted_tasks() # check that cache holds result key = ("_sorted_tasks", (("root_tasks", ()), )) assert f._cache[key] == (t1, t2) # check that cache is read f._cache[key] = 1 assert f.sorted_tasks() == 1 f.add_edge(t2, t3) assert f.sorted_tasks() == (t1, t2, t3)
def test_errors_when_regular_runner_is_used(self): task = Task(name="Task", result_handler=PandasResultHandler("dummy.csv", "csv")) task_runner = TaskRunner(task) old_state = Pending() new_state = Running() with pytest.raises(TypeError): dsh.checkpoint_handler(task_runner, old_state, new_state)