def test_flow_runner_runs_flow_with_2_dependent_tasks_and_first_task_fails_and_second_has_trigger(): flow = Flow(name="test") task1 = ErrorTask() task2 = SuccessTask(trigger=prefect.triggers.all_failed) flow.add_edge(task1, task2) flow_state = FlowRunner(flow=flow).run(return_tasks=[task1, task2]) assert isinstance( flow_state, Success ) # flow state is determined by terminal states assert isinstance(flow_state.result[task1], Failed) assert isinstance(flow_state.result[task2], Success)
def test_flow_runner_runs_basic_flow_with_2_dependent_tasks_and_first_task_fails_with_FAIL( ): flow = Flow(name="test") task1 = RaiseFailTask() task2 = SuccessTask() flow.add_edge(task1, task2) flow_state = FlowRunner(flow=flow).run(return_tasks=[task1, task2]) assert isinstance(flow_state, Failed) assert isinstance(flow_state.result[task1], Failed) assert not isinstance(flow_state.result[task1], TriggerFailed) assert isinstance(flow_state.result[task2], TriggerFailed)
def test_all_pipeline_method_steps_are_called(): pipeline = [ "initialize_run", "check_flow_is_pending_or_running", "set_flow_to_running", "get_flow_run_state", ] runner = FlowRunner(Flow(name="test")) for method in pipeline: setattr(runner, method, MagicMock()) # initialize run is unpacked, which MagicMocks dont support runner.initialize_run = MagicMock(return_value=FlowRunnerInitializeResult( MagicMock(), MagicMock(), MagicMock(), MagicMock())) runner.run() for method in pipeline: assert getattr(runner, method).call_count == 1
def test_parameters_are_placed_into_context_including_defaults(): @prefect.task def whats_in_ctx(): return prefect.context.parameters y = prefect.Parameter("y", default=99) z = prefect.Parameter("z", default=19) flow = Flow(name="test", tasks=[y, z, whats_in_ctx]) flow_state = FlowRunner(flow=flow).run( return_tasks=[whats_in_ctx], parameters=dict(y=42) ) assert isinstance(flow_state, Success) assert flow_state.result[whats_in_ctx].result == dict(y=42, z=19)
def test_initialize_sets_task_contexts(self): t1 = Task(name="t1") t2 = Parameter(name="x") flow = Flow(name="test", tasks=[t1, t2]) result = FlowRunner(flow).initialize_run(state=Pending(), task_states={}, context={}, task_contexts={}, parameters={}) assert result.task_contexts == { t: dict(task_name=t.name, task_slug=flow.slugs[t]) for t in flow.tasks }
def test_manual_only_trigger_caches_inputs(self, executor): with Flow(name="test") as f: x = Parameter("x") inp = SuccessTask() t = AddTask(trigger=manual_only) res = t(x, inp) first_state = FlowRunner(flow=f).run( executor=executor, parameters=dict(x=11), return_tasks=f.tasks ) assert first_state.is_running() first_state.result.update( {res: Resume(cached_inputs=first_state.result[res].cached_inputs)} ) second_state = FlowRunner(flow=f).run( executor=executor, parameters=dict(x=1), return_tasks=[res], task_states=first_state.result, ) assert isinstance(second_state, Success) assert second_state.result[res].result == 12
def test_retries_ignore_cached_inputs_if_upstream_results_are_available( self, executor ): with Flow(name="test") as f: a = CountTask() b = ReturnTask(max_retries=1, retry_delay=datetime.timedelta(0)) a_res = a() b_res = b(a_res) first_state = FlowRunner(flow=f).run(executor=executor, return_tasks=f.tasks) assert first_state.is_running() a_state = first_state.result[a_res] a_state.result = 100 # modify the result b_state = first_state.result[b_res] b_state.cached_inputs = dict(x=Result(2)) # artificially alter state with raise_on_exception(): # without caching we'd expect a KeyError second_state = FlowRunner(flow=f).run( executor=executor, return_tasks=[b_res], task_states=first_state.result ) assert isinstance(second_state, Success) assert second_state.result[b_res].result == 1 / 99
def test_retries_use_cached_inputs(self, executor): with Flow(name="test") as f: a = CountTask() b = ReturnTask(max_retries=1, retry_delay=datetime.timedelta(0)) a_res = a() b_res = b(a_res) first_state = FlowRunner(flow=f).run(executor=executor, return_tasks=f.tasks) assert first_state.is_running() a_state = first_state.result[a_res] a_state.result = ( NoResult ) # remove the result to see if the cached results are picked up b_state = first_state.result[b_res] b_state.cached_inputs = dict(x=Result(2)) # artificially alter state with raise_on_exception(): # without caching we'd expect a KeyError second_state = FlowRunner(flow=f).run( executor=executor, return_tasks=[b_res], task_states=first_state.result ) assert isinstance(second_state, Success) assert second_state.result[b_res].result == 1
def test_determine_final_state_preserves_running_states_when_tasks_still_running( self, ): task = Task() flow = Flow(name="test", tasks=[task]) old_state = Running() new_state = FlowRunner(flow=flow).get_flow_run_state( state=old_state, task_states={task: Retrying(start_time=pendulum.now("utc").add(days=1))}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state is old_state
def test_parameter_precedance(self): x = Parameter(name="x") flow = Flow(name="test", tasks=[x]) result = FlowRunner(flow).initialize_run( state=Pending(), task_states={}, context={"parameters": { "x": 2, "y": 1 }}, task_contexts={}, parameters={"x": 1}, ) assert result.context["parameters"] == {"x": 1, "y": 1}
def test_pause_task_doesnt_pause_sometimes(self): class OneTask(Task): def run(self): tasks.pause_task() return 1 class AddTask(Task): def run(self, x, y): if x == y: tasks.pause_task() return x + y with Flow(name="test") as f: t1 = AddTask()(1, 1) t2 = OneTask()(upstream_tasks=[t1]) res = FlowRunner(flow=f).run(task_states={t1: Resume()}, return_tasks=[t1, t2]) assert res.result[t1].is_successful() assert isinstance(res.result[t2], Paused)
def test_secrets_dynamically_pull_from_context(): flow = Flow(name="test") task1 = PrefectSecret("foo", max_retries=1, retry_delay=datetime.timedelta(0)) flow.add_task(task1) flow_state = FlowRunner(flow=flow).run(return_tasks=[task1]) assert flow_state.is_running() assert flow_state.result[task1].is_retrying() with prefect.context(secrets=dict(foo=42)): time.sleep(1) flow_state = FlowRunner(flow=flow).run(task_states=flow_state.result) assert flow_state.is_successful()
def test_not_pending_or_running_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
def test_flow_runner_has_logger(): r = FlowRunner(Flow(name="test")) assert r.logger.name == "prefect.FlowRunner"
def test_task_runner_handlers_are_called(self): FlowRunner(flow=Flow(name="test"), state_handlers=[flow_runner_handler]).run() # the flow changed state twice: Pending -> Running -> Success assert handler_results["FlowRunner"] == 2
def test_multiple_flow_handlers_are_called(self): flow = Flow(name="test", state_handlers=[flow_handler, flow_handler]) FlowRunner(flow=flow).run() # each flow changed state twice: Pending -> Running -> Success assert handler_results["Flow"] == 4
def test_flow_on_failure_is_not_called(self): on_failure = MagicMock() flow = Flow(name="test", on_failure=on_failure, tasks=[Task()]) FlowRunner(flow=flow).run() assert not on_failure.called
def test_parameters_can_be_set_in_context_if_none_passed(): x = prefect.Parameter("x") f = FlowRunner(Flow(name="test", tasks=[x])) state = f.run(parameters={}, context={"parameters": {"x": 5}}, return_tasks=[x]) assert state.result[x].result == 5
def test_running_stays_running(self): state = Running() flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).set_flow_to_running(state=state) assert new_state.is_running()
def test_other_states_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).set_flow_to_running(state=state)
def test_scheduled_states_with_past_start_time(self): state = Scheduled(start_time=pendulum.now("utc") - datetime.timedelta(minutes=1)) assert (FlowRunner(flow=Flow( name="test")).check_flow_reached_start_time(state=state) is state)
def test_pending_becomes_running(self, state): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).set_flow_to_running(state=state) assert new_state.is_running()
def test_scheduled_states_without_start_time(self): state = Scheduled(start_time=None) assert (FlowRunner(flow=Flow( name="test")).check_flow_reached_start_time(state=state) is state)
def test_initialize_sets_none_to_pending(self): result = FlowRunner(Flow(name="test")).initialize_run( state=None, task_states={}, context={}, task_contexts={}, parameters={} ) assert result.state.is_pending()
# where: 0 and 3 - Lexical Matching # 1 - Spatial (Relative to screen) # 2 - Spatial (Relative to other elements) with Flow("Running the Transformers for Pair Classification") as flow1: with tags("train"): train_input = prepare_rico_task(train_path, type_instructions=INSTRUCTION_TYPE) train_dataset = prepare_rico_layout_lm_task(train_input["data"]) with tags("dev"): dev_input = prepare_rico_task(dev_path, type_instructions=INSTRUCTION_TYPE) dev_dataset = prepare_rico_layout_lm_task(dev_input["data"]) with tags("test"): test_input = prepare_rico_task(test_path, type_instructions=INSTRUCTION_TYPE) test_dataset = prepare_rico_layout_lm_task(test_input["data"]) layout_lm_trainer_task( train_dataset=train_dataset, dev_dataset=dev_dataset, test_dataset=test_dataset, mapping_dev=dev_input["mapping"], mapping_test=test_input["mapping"], task_name="layout_lm_pair_rico", output_dir="./cache/layout_lm_pair_rico/", mode="train", eval_fn=pair_evaluation, ) FlowRunner(flow=flow1).run()
def test_pending_or_running_are_ok(self, state): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running( state=state) assert new_state is state
def test_initialize_returns_state_if_provided(self, state): result = FlowRunner(Flow(name="test")).initialize_run( state=state, task_states={}, context={}, task_contexts={}, parameters={} ) assert result.state is state
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor): """ This test makes sure that mapped tasks properly generate children tasks even when run multiple times and without available upstream results. In this test, we run the pipeline from a variety of starting points, ensuring that some upstream results are unavailable and checking that children pipelines are properly regenerated. """ @prefect.task def numbers(): return [1, 2, 3] @prefect.task def plus_one(x): return x + 1 @prefect.task def get_sum(x): return sum(x) with Flow(name="test") as f: n = numbers() x = plus_one.map(n) y = plus_one.map(x) s = get_sum(y) # first run with a missing result from `n` but map_states for `x` state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[ Pending(cached_inputs={"x": Result(i)}) for i in range(1, 4) ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12 # next run with missing results for n and x state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Success(), Success(), Success()]), y: Mapped(map_states=[ Success(result=3), Success(result=4), Retrying(cached_inputs={"x": Result(4)}), ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12 # next run with missing results for n, x, and y state = FlowRunner(flow=f).run( executor=executor, task_states={ n: Success(), x: Mapped(map_states=[Success(), Success(), Success()]), y: Mapped(map_states=[ Success(result=3), Success(result=4), Success(result=5) ]), }, return_tasks=f.tasks, ) assert state.is_successful() assert state.result[s].result == 12
def test_non_scheduled_states(self, state): assert (FlowRunner(flow=Flow( name="test")).check_flow_reached_start_time(state=state) is state)