def test_simple_two_task_flow_with_final_task_already_running(monkeypatch, executor): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) with prefect.Flow(name="test") as flow: t1 = prefect.Task() t2 = prefect.Task() t2.set_upstream(t1) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun( id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id ), TaskRun( id=task_run_id_2, task_slug=flow.slugs[t2], version=1, flow_run_id=flow_run_id, state=Running(), ), ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run( return_tasks=flow.tasks, executor=executor ) assert state.is_running() assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_successful() assert client.task_runs[task_run_id_1].version == 2 assert client.task_runs[task_run_id_2].state.is_running() assert client.task_runs[task_run_id_2].version == 1
def test_simple_three_task_flow_with_one_failing_task(monkeypatch, executor): @prefect.task def error(): 1 / 0 flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test") as flow: t1 = prefect.Task() t2 = prefect.Task() t3 = error() t2.set_upstream(t1) t3.set_upstream(t2) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id), ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks, executor=executor) assert state.is_failed() assert client.flow_runs[flow_run_id].state.is_failed() assert client.task_runs[task_run_id_1].state.is_successful() assert client.task_runs[task_run_id_1].version == 2 assert client.task_runs[task_run_id_2].state.is_successful() assert client.task_runs[task_run_id_2].version == 2 assert client.task_runs[task_run_id_3].state.is_failed() assert client.task_runs[task_run_id_2].version == 2
def test_flow_runner_heartbeat_sets_command(monkeypatch, setting_available): client = MagicMock() monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)) client.graphql.return_value.data.flow_run_by_pk.flow.settings = (dict( heartbeat_enabled=True) if setting_available else {}) runner = CloudFlowRunner(flow=prefect.Flow(name="test")) with prefect.context(flow_run_id="foo"): res = runner._heartbeat() assert res is True assert runner.heartbeat_cmd == [ sys.executable, "-m", "prefect", "heartbeat", "flow-run", "-i", "foo", ]
def test_flow_runner_calls_client_the_appropriate_number_of_times(client): flow = prefect.Flow(name="test") res = CloudFlowRunner(flow=flow).run() ## assertions assert client.get_flow_run_info.call_count == 2 # initial state & cancel check assert client.set_flow_run_state.call_count == 2 # Pending -> Running -> Success states = [ call[1]["state"] for call in client.set_flow_run_state.call_args_list ] assert states == [Running(), Success(result={})]
def test_task_failure_caches_inputs_automatically(client): @prefect.task(max_retries=2, retry_delay=timedelta(seconds=100)) def is_p_three(p): if p == 3: raise ValueError("No thank you.") with prefect.Flow("test") as f: p = prefect.Parameter("p") res = is_p_three(p) state = CloudFlowRunner(flow=f).run(return_tasks=[res], parameters=dict(p=3)) assert state.is_running() assert isinstance(state.result[res], Retrying) exp_res = Result(3, result_handler=JSONResultHandler()) assert not state.result[res].cached_inputs["p"] == exp_res exp_res.store_safe_value() assert state.result[res].cached_inputs["p"] == exp_res last_state = client.set_task_run_state.call_args_list[-1][-1]["state"] assert isinstance(last_state, Retrying) assert last_state.cached_inputs["p"] == exp_res
def do_mocked_run(self, client, monkeypatch, n_attempts=None, n_queries=None, query_end_state=None): """Mock out a cloud flow run that starts in a queued state and either succeeds or exits early due to a state change.""" mock_sleep = MagicMock() def run(*args, **kwargs): if n_attempts is None or mock_run.call_count < n_attempts: info = get_flow_run_info() if info.state.is_queued(): return Queued(start_time=pendulum.now("UTC").add( seconds=self.queue_time)) return info.state return Success() mock_run = MagicMock(side_effect=run) def get_flow_run_info(*args, **kwargs): if n_queries is None or mock_get_flow_run_info.call_count < n_queries: state = Queued() else: state = query_end_state return MagicMock(version=mock_get_flow_run_info.call_count, state=state) mock_get_flow_run_info = MagicMock(side_effect=get_flow_run_info) client.get_flow_run_info = mock_get_flow_run_info monkeypatch.setattr("prefect.engine.cloud.flow_runner.FlowRunner.run", mock_run) monkeypatch.setattr("prefect.engine.cloud.flow_runner.time_sleep", mock_sleep) @prefect.task def return_one(): return 1 with prefect.Flow("test-cloud-flow-runner-with-queues") as flow: return_one() with set_temporary_config({ "cloud.check_cancellation_interval": self.check_cancellation_interval }): state = CloudFlowRunner(flow=flow).run() return state, mock_sleep, mock_run
def test_starting_at_arbitrary_loop_index_from_cloud_context(client): @prefect.task def looper(x): if prefect.context.get("task_loop_count", 1) < 20: raise LOOP(result=prefect.context.get("task_loop_result", 0) + x) return prefect.context.get("task_loop_result", 0) + x @prefect.task def downstream(l): return l**2 with prefect.Flow(name="looping", result_handler=JSONResultHandler()) as f: inter = looper(10) final = downstream(inter) client.get_flow_run_info = MagicMock(return_value=MagicMock( context={"task_loop_count": 20})) flow_state = CloudFlowRunner(flow=f).run(return_tasks=[inter, final]) assert flow_state.is_successful() assert flow_state.result[inter].result == 10 assert flow_state.result[final].result == 100
def test_flow_runner_retries_forever_on_queued_state(client, monkeypatch, num_attempts): mock_sleep = MagicMock() monkeypatch.setattr("prefect.engine.cloud.flow_runner.time.sleep", mock_sleep) run_states = [ Queued(start_time=pendulum.now("UTC").add(seconds=i)) for i in range(num_attempts - 1) ] run_states.append(Success()) mock_run = MagicMock(side_effect=run_states) client.get_flow_run_info = MagicMock( side_effect=[MagicMock(version=i) for i in range(num_attempts)]) # Mock out the actual flow execution monkeypatch.setattr("prefect.engine.cloud.flow_runner.FlowRunner.run", mock_run) @prefect.task def return_one(): return 1 with prefect.Flow("test-cloud-flow-runner-with-queues") as flow: one = return_one() # Without these (actual, not mocked) sleep calls, when running full test suite this # test can fail for no reason. final_state = CloudFlowRunner(flow=flow).run() assert final_state.is_successful() assert mock_run.call_count == num_attempts # Not called on the initial run attempt assert client.get_flow_run_info.call_count == num_attempts - 1
def test_scheduled_start_time_is_in_context(monkeypatch, executor): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) flow = prefect.Flow(name="test", tasks=[whats_the_time]) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=whats_the_time.id, flow_run_id=flow_run_id) ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks, executor=executor) assert state.is_successful() assert client.flow_runs[flow_run_id].state.is_successful() assert client.task_runs[task_run_id_1].state.is_successful() assert isinstance(state.result[whats_the_time].result, datetime.datetime)
def test_simple_map(monkeypatch): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow: t1 = plus_one.map([0, 1, 2]) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id) ] + [ TaskRun( id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id ) for t in flow.tasks if t is not t1 ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run( return_tasks=flow.tasks, executor=LocalExecutor() ) assert state.is_successful() assert client.flow_runs[flow_run_id].state.is_successful() assert client.task_runs[task_run_id_1].state.is_mapped() # there should be a total of 4 task runs corresponding to the mapped task assert ( len([tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1]]) == 4 )
def test_flow_runner_loads_context_from_cloud(monkeypatch): flow = prefect.Flow(name="test") get_flow_run_info = MagicMock(return_value=MagicMock(context={"a": 1})) set_flow_run_state = MagicMock() client = MagicMock( get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state ) monkeypatch.setattr( "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client) ) res = CloudFlowRunner(flow=flow).initialize_run( state=Pending(), task_states={}, context={}, task_contexts={}, parameters={} ) assert res.context["a"] == 1
def test_flow_runner_respects_the_db_state(monkeypatch, state): flow = prefect.Flow(name="test") db_state = state("already", result=10) get_flow_run_info = MagicMock(return_value=MagicMock(state=db_state)) set_flow_run_state = MagicMock() client = MagicMock(get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state) monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)) res = CloudFlowRunner(flow=flow).run() ## assertions assert get_flow_run_info.call_count == 1 # one time to pull latest state assert set_flow_run_state.call_count == 0 # never needs to update state assert res == db_state
def test_flow_runner_raises_endrun_with_correct_state_if_client_cant_retrieve_state( monkeypatch, ): flow = prefect.Flow(name="test") get_flow_run_info = MagicMock(side_effect=SyntaxError) set_flow_run_state = MagicMock() client = MagicMock(get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state) monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)) ## if ENDRUN is raised, res will be last state seen state = Pending("unique message", result=22) res = CloudFlowRunner(flow=flow).run(state=state) assert get_flow_run_info.called assert res is state
def test_flow_runner_prioritizes_user_context_over_default_context(monkeypatch): flow = prefect.Flow(name="test") get_flow_run_info = MagicMock(return_value=MagicMock(context={"today": "is a day"})) set_flow_run_state = MagicMock() client = MagicMock( get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state ) monkeypatch.setattr( "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client) ) res = CloudFlowRunner(flow=flow).initialize_run( state=None, task_states={}, context={}, task_contexts={}, parameters={} ) assert "today" in res.context assert res.context["today"] == "is a day"
def test_flow_runner_initializes_context_from_cloud(monkeypatch): from prefect.client.client import FlowRunInfoResult, ProjectInfo flow = prefect.Flow(name="test") scheduled_start_time = pendulum.parse("19860920") get_flow_run_info = MagicMock( return_value=FlowRunInfoResult( id="my-flow-run-id", name="my-flow-run-name", flow_id="my-flow-id", version=1, task_runs=[], state=Pending(), scheduled_start_time=scheduled_start_time, project=ProjectInfo(id="my-project-id", name="my-project-name"), parameters={"p1": 1, "p2": 2}, context={"c1": 1, "c2": 2}, ) ) client = MagicMock( get_flow_run_info=get_flow_run_info, set_flow_run_state=MagicMock() ) monkeypatch.setattr( "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client) ) res = CloudFlowRunner(flow=flow).initialize_run( state=Pending(), task_states={}, context={"c2": "two", "c3": 3}, task_contexts={}, parameters={"p2": "two", "p3": 3}, ) assert res.context["flow_id"] == "my-flow-id" assert res.context["flow_run_id"] == "my-flow-run-id" assert res.context["flow_run_version"] == 1 assert res.context["flow_run_name"] == "my-flow-run-name" assert res.context["scheduled_start_time"] == scheduled_start_time assert res.context["project_name"] == "my-project-name" assert res.context["project_id"] == "my-project-id" # Explicitly provided parameters override those in cloud assert res.context["parameters"] == {"p1": 1, "p2": "two", "p3": 3} # Explicitly provided context overridden by cloud assert res.context["c1"] == 1 assert res.context["c2"] == 2 assert res.context["c3"] == 3
def test_flow_runner_prioritizes_kwarg_states_over_db_states( monkeypatch, state): flow = prefect.Flow(name="test") db_state = state("already", result=10) get_flow_run_info = MagicMock(return_value=MagicMock(state=db_state)) set_flow_run_state = MagicMock() client = MagicMock(get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state) monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)) res = CloudFlowRunner(flow=flow).run(state=Pending("let's do this")) ## assertions assert get_flow_run_info.call_count == 1 # one time to pull latest state assert set_flow_run_state.call_count == 2 # Pending -> Running -> Success states = [call[1]["state"] for call in set_flow_run_state.call_args_list] assert states == [Running(), Success(result={})]
def test_flow_runner_puts_scheduled_start_time_in_context(monkeypatch): flow = prefect.Flow(name="test") date = pendulum.parse("19860920") get_flow_run_info = MagicMock( return_value=MagicMock(context={}, scheduled_start_time=date) ) set_flow_run_state = MagicMock() client = MagicMock( get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state ) monkeypatch.setattr( "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client) ) res = CloudFlowRunner(flow=flow).initialize_run( state=None, task_states={}, context={}, task_contexts={}, parameters={} ) assert "scheduled_start_time" in res.context assert isinstance(res.context["scheduled_start_time"], datetime.datetime) assert res.context["scheduled_start_time"].strftime("%Y-%m-%d") == "1986-09-20"
def test_flow_runner_puts_flow_run_name_in_context(monkeypatch): flow = prefect.Flow(name="test") # we can't pass a `name` argument to a mock # https://docs.python.org/3/library/unittest.mock.html#mock-names-and-the-name-attribute info_mock = MagicMock(context={}) info_mock.name = "flow run name" get_flow_run_info = MagicMock(return_value=info_mock) set_flow_run_state = MagicMock() client = MagicMock(get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state) monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)) res = CloudFlowRunner(flow=flow).initialize_run(state=None, task_states={}, context={}, task_contexts={}, parameters={}) assert res.context["flow_run_name"] == "flow run name"
def test_check_interrupt_loop_robust_to_api_errors(self, client, monkeypatch): trigger = threading.Event() error_was_raised = False def get_flow_run_info(*args, _call_count=itertools.count(), **kwargs): call_count = next(_call_count) import inspect caller_name = inspect.currentframe().f_back.f_code.co_name if caller_name == "interrupt_if_cancelling" and call_count % 2: nonlocal error_was_raised error_was_raised = True raise ValueError("Woops!") state = Cancelling() if trigger.is_set() else Running() return MagicMock(version=call_count, state=state) client.get_flow_run_info = get_flow_run_info ran_longer_than_expected = False @prefect.task def set_trigger(x): trigger.set() time.sleep(10) nonlocal ran_longer_than_expected ran_longer_than_expected = True return x + 1 with prefect.Flow("test") as flow: set_trigger(1) with set_temporary_config({"cloud.check_cancellation_interval": 0.1}): res = CloudFlowRunner(flow=flow).run() assert isinstance(res, Cancelled) assert error_was_raised assert not ran_longer_than_expected
def test_db_cancelled_states_interrupt_flow_run(client, monkeypatch): calls = dict(count=0) def heartbeat_counter(*args, **kwargs): if calls["count"] == 3: return Box(dict(data=dict(flow_run_by_pk=dict(state="Cancelled")))) calls["count"] += 1 return Box(dict(data=dict(flow_run_by_pk=dict(state="Running")))) client.graphql = heartbeat_counter @prefect.task def sleeper(): time.sleep(3) f = prefect.Flow("test", tasks=[sleeper]) with set_temporary_config({"cloud.heartbeat_interval": 0.025}): state = CloudFlowRunner(flow=f).run(return_tasks=[sleeper]) assert isinstance(state, Cancelled) assert "interrupt" in state.message.lower()
def test_task_failure_with_upstream_secrets_doesnt_store_secret_value_and_recompute_if_necessary( client, ): @prefect.task(max_retries=2, retry_delay=timedelta(seconds=100)) def is_p_three(p): if p == 3: raise ValueError("No thank you.") return p with prefect.Flow("test", result_handler=JSONResultHandler()) as f: p = prefect.tasks.secrets.Secret("p") res = is_p_three(p) with prefect.context(secrets=dict(p=3)): state = CloudFlowRunner(flow=f).run(return_tasks=[res]) assert state.is_running() assert isinstance(state.result[res], Retrying) exp_res = Result(3, result_handler=SecretResultHandler(p)) assert not state.result[res].cached_inputs["p"] == exp_res exp_res.store_safe_value() assert state.result[res].cached_inputs["p"] == exp_res ## here we set the result of the secret to a saferesult, ensuring ## it will get converted to a "true" result; ## we expect that the upstream value will actually get recomputed from context ## through the SecretResultHandler safe = SafeResult("p", result_handler=SecretResultHandler(p)) state.result[p] = Success(result=safe) state.result[res].start_time = pendulum.now("utc") state.result[res].cached_inputs = dict(p=safe) with prefect.context(secrets=dict(p=4)): new_state = CloudFlowRunner(flow=f).run(return_tasks=[res], task_states=state.result) assert new_state.is_successful() assert new_state.result[res].result == 4
def test_non_keyed_states_are_hydrated_correctly_with_retries( monkeypatch, tmpdir): """ Ensures that retries longer than 10 minutes properly "hydrate" upstream states so that mapped tasks retry correctly - for mapped tasks, even non-data dependencies can affect the number of children spawned. """ @prefect.task def return_list(): return [1, 2, 3] @prefect.task(max_retries=1, retry_delay=datetime.timedelta(minutes=20)) def fail_once(): if prefect.context.get("task_run_count", 0) < 2: raise SyntaxError("bad") else: return 100 flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) with prefect.Flow(name="test-retries", result=LocalResult(dir=tmpdir)) as flow: t1 = fail_once.map(upstream_tasks=[return_list]) monkeypatch.setattr("requests.Session", MagicMock()) monkeypatch.setattr("requests.post", MagicMock()) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id), TaskRun( id=task_run_id_2, task_slug=flow.slugs[return_list], flow_run_id=flow_run_id, ), ] + [ TaskRun(id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, return_list] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_successful() # there should be a total of 4 task runs corresponding to each mapped task assert (len([ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1] ]) == 4) # t1's first child task should be retrying assert all([ isinstance(tr.state, Retrying) for tr in client.task_runs.values() if (tr.task_slug == flow.slugs[t1] and tr.map_index != -1) ]) # RUN A SECOND TIME with an artificially updated start time # and remove all in-memory data for idx, tr in client.task_runs.items(): if tr.task_slug == flow.slugs[t1] and tr.map_index != -1: tr.state.start_time = pendulum.now("UTC") for idx, tr in client.task_runs.items(): tr.state._result.value = None with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert (len([ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1] ]) == 4) assert all(tr.state.is_successful() for tr in client.task_runs.values())
def test_deep_map_with_a_retry(monkeypatch): """ Creates a situation in which a deeply-mapped Flow encounters a one-time error in one of the middle layers. Running the flow a second time should resolve the error. DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test") as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(seconds=0) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id), ] + [ TaskRun(id=t.id, task_id=t.id, flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run() assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert len([ tr for tr in client.task_runs.values() if tr.task_id == t.id ]) == 4 # t2's first child task should be retrying t2_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t2.id and tr.map_index == 0) assert isinstance(t2_0.state, Retrying) # t3's first child task should be pending t3_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t3.id and tr.map_index == 0) assert t3_0.state.is_pending() # RUN A SECOND TIME with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run() # t2's first child task should be successful t2_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t2.id and tr.map_index == 0) assert t2_0.state.is_successful() # t3's first child task should be successful t3_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t3.id and tr.map_index == 0) assert t3_0.state.is_successful()
def test_deep_map_with_a_failure(monkeypatch, executor): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun( id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id ), TaskRun( id=task_run_id_2, task_slug=flow.slugs[t2], flow_run_id=flow_run_id ), TaskRun( id=task_run_id_3, task_slug=flow.slugs[t3], flow_run_id=flow_run_id ), ] + [ TaskRun( id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id ) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks) assert state.is_failed() assert client.flow_runs[flow_run_id].state.is_failed() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert ( len( [ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t] ] ) == 4 ) # t2's first child task should have failed t2_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0 ) assert t2_0.state.is_failed() # t3's first child task should have failed t3_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t3] and tr.map_index == 0 ) assert t3_0.state.is_failed()
def test_deep_map_with_a_retry(monkeypatch): """ Creates a situation in which a deeply-mapped Flow encounters a one-time error in one of the middle layers. Running the flow a second time should resolve the error. DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(seconds=100) monkeypatch.setattr("requests.Session", MagicMock()) monkeypatch.setattr("requests.post", MagicMock()) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=t1.slug, flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_slug=t2.slug, flow_run_id=flow_run_id), TaskRun(id=task_run_id_3, task_slug=t3.slug, flow_run_id=flow_run_id), ] + [ TaskRun(id=str(uuid.uuid4()), task_slug=t.slug, flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert ( len([tr for tr in client.task_runs.values() if tr.task_slug == t.slug]) == 4 ) # t2's first child task should be retrying t2_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t2.slug and tr.map_index == 0 ) assert isinstance(t2_0.state, Retrying) # t3's first child task should be pending t3_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t3.slug and tr.map_index == 0 ) assert t3_0.state.is_pending() # RUN A SECOND TIME with an artificially updated start time failed_id = [ t_id for t_id, tr in client.task_runs.items() if tr.task_slug == t2.slug and tr.map_index == 0 ].pop() client.task_runs[failed_id].state.start_time = pendulum.now("UTC") with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) # t2's first child task should be successful t2_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t2.slug and tr.map_index == 0 ) assert t2_0.state.is_successful() # t3's first child task should be successful t3_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t3.slug and tr.map_index == 0 ) assert t3_0.state.is_successful()
def test_task_runner_cls_is_cloud_task_runner(): fr = CloudFlowRunner(flow=prefect.Flow(name="test")) assert fr.task_runner_cls is CloudTaskRunner
def test_states_are_hydrated_correctly_with_retries(monkeypatch, tmpdir): """ Ensures that retries longer than 10 minutes properly "hydrate" upstream states so that mapped tasks retry correctly. """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) with prefect.Flow(name="test-retries", result=LocalResult(dir=tmpdir)) as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(minutes=100) monkeypatch.setattr("requests.Session", MagicMock()) monkeypatch.setattr("requests.post", MagicMock()) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_slug=flow.slugs[t2], flow_run_id=flow_run_id), ] + [ TaskRun(id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2]: assert (len([ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t] ]) == 4) # t2's first child task should be retrying t2_0 = next(tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0) assert isinstance(t2_0.state, Retrying) # RUN A SECOND TIME with an artificially updated start time # and remove all in-memory data failed_id = [ t_id for t_id, tr in client.task_runs.items() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0 ].pop() client.task_runs[failed_id].state.start_time = pendulum.now("UTC") for idx, tr in client.task_runs.items(): tr.state._result.value = None with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) # t2's first child task should be successful t2_0 = next(tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0) assert t2_0.state.is_successful()