def test_starting_at_arbitrary_loop_index_from_cloud_context(client): @prefect.task def looper(x): if prefect.context.get("task_loop_count", 1) < 20: raise LOOP(result=prefect.context.get("task_loop_result", 0) + x) return prefect.context.get("task_loop_result", 0) + x @prefect.task def downstream(l): return l**2 with prefect.Flow(name="looping", result=PrefectResult()) as f: inter = looper(10) final = downstream(inter) client.get_flow_run_info = MagicMock(return_value=MagicMock( context={"task_loop_count": 20})) flow_state = CloudFlowRunner(flow=f).run(return_tasks=[inter, final]) assert flow_state.is_successful() assert flow_state.result[inter].result == 10 assert flow_state.result[final].result == 100
def test_simple_two_task_flow_with_final_task_already_running( monkeypatch, executor): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) with prefect.Flow(name="test") as flow: t1 = prefect.Task() t2 = prefect.Task() t2.set_upstream(t1) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id), TaskRun( id=task_run_id_2, task_id=t2.id, version=1, flow_run_id=flow_run_id, state=Running(), ), ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks, executor=executor) assert state.is_running() assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_successful() assert client.task_runs[task_run_id_1].version == 2 assert client.task_runs[task_run_id_2].state.is_running() assert client.task_runs[task_run_id_2].version == 1
def test_flow_runner_retries_forever_on_queued_state(client, monkeypatch, num_attempts): mock_sleep = MagicMock() monkeypatch.setattr("prefect.engine.cloud.flow_runner.time.sleep", mock_sleep) run_states = [ Queued(start_time=pendulum.now("UTC").add(seconds=i)) for i in range(num_attempts - 1) ] run_states.append(Success()) mock_run = MagicMock(side_effect=run_states) client.get_flow_run_info = MagicMock( side_effect=[MagicMock(version=i) for i in range(num_attempts)]) # Mock out the actual flow execution monkeypatch.setattr("prefect.engine.cloud.flow_runner.FlowRunner.run", mock_run) @prefect.task def return_one(): return 1 with prefect.Flow("test-cloud-flow-runner-with-queues") as flow: one = return_one() # Without these (actual, not mocked) sleep calls, when running full test suite this # test can fail for no reason. final_state = CloudFlowRunner(flow=flow).run() assert final_state.is_successful() assert mock_run.call_count == num_attempts # Not called on the initial run attempt assert client.get_flow_run_info.call_count == num_attempts - 1
def test_scheduled_start_time_is_in_context(monkeypatch, executor): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) flow = prefect.Flow(name="test", tasks=[whats_the_time]) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=whats_the_time.id, flow_run_id=flow_run_id) ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks, executor=executor) assert state.is_successful() assert client.flow_runs[flow_run_id].state.is_successful() assert client.task_runs[task_run_id_1].state.is_successful() assert isinstance(state.result[whats_the_time].result, datetime.datetime)
def test_flow_runner_heartbeat_sets_command(monkeypatch, setting_available): client = MagicMock() monkeypatch.setattr( "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client) ) client.graphql.return_value.data.flow_run_by_pk.flow.settings = ( dict(heartbeat_enabled=True) if setting_available else {} ) runner = CloudFlowRunner(flow=prefect.Flow(name="test")) with prefect.context(flow_run_id="foo"): res = runner._heartbeat() assert res is True assert runner.heartbeat_cmd == [ sys.executable, "-m", "prefect", "heartbeat", "flow-run", "-i", "foo", ]
def test_simple_map(monkeypatch): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow: t1 = plus_one.map([0, 1, 2]) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id) ] + [ TaskRun( id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id ) for t in flow.tasks if t is not t1 ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run( return_tasks=flow.tasks, executor=LocalExecutor() ) assert state.is_successful() assert client.flow_runs[flow_run_id].state.is_successful() assert client.task_runs[task_run_id_1].state.is_mapped() # there should be a total of 4 task runs corresponding to the mapped task assert ( len([tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1]]) == 4 )
def test_task_runner_cls_is_cloud_task_runner(): fr = CloudFlowRunner(flow=prefect.Flow(name="test")) assert fr.task_runner_cls is CloudTaskRunner
def test_deep_map_with_a_retry(monkeypatch): """ Creates a situation in which a deeply-mapped Flow encounters a one-time error in one of the middle layers. Running the flow a second time should resolve the error. DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test") as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(seconds=0) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id), TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id), ] + [ TaskRun(id=t.id, task_id=t.id, flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run() assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert len([ tr for tr in client.task_runs.values() if tr.task_id == t.id ]) == 4 # t2's first child task should be retrying t2_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t2.id and tr.map_index == 0) assert isinstance(t2_0.state, Retrying) # t3's first child task should be pending t3_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t3.id and tr.map_index == 0) assert t3_0.state.is_pending() # RUN A SECOND TIME with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run() # t2's first child task should be successful t2_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t2.id and tr.map_index == 0) assert t2_0.state.is_successful() # t3's first child task should be successful t3_0 = next(tr for tr in client.task_runs.values() if tr.task_id == t3.id and tr.map_index == 0) assert t3_0.state.is_successful()
def test_deep_map_with_a_retry(monkeypatch): """ Creates a situation in which a deeply-mapped Flow encounters a one-time error in one of the middle layers. Running the flow a second time should resolve the error. DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(seconds=100) monkeypatch.setattr("requests.Session", MagicMock()) monkeypatch.setattr("requests.post", MagicMock()) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=t1.slug, flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_slug=t2.slug, flow_run_id=flow_run_id), TaskRun(id=task_run_id_3, task_slug=t3.slug, flow_run_id=flow_run_id), ] + [ TaskRun(id=str(uuid.uuid4()), task_slug=t.slug, flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert ( len([tr for tr in client.task_runs.values() if tr.task_slug == t.slug]) == 4 ) # t2's first child task should be retrying t2_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t2.slug and tr.map_index == 0 ) assert isinstance(t2_0.state, Retrying) # t3's first child task should be pending t3_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t3.slug and tr.map_index == 0 ) assert t3_0.state.is_pending() # RUN A SECOND TIME with an artificially updated start time failed_id = [ t_id for t_id, tr in client.task_runs.items() if tr.task_slug == t2.slug and tr.map_index == 0 ].pop() client.task_runs[failed_id].state.start_time = pendulum.now("UTC") with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) # t2's first child task should be successful t2_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t2.slug and tr.map_index == 0 ) assert t2_0.state.is_successful() # t3's first child task should be successful t3_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == t3.slug and tr.map_index == 0 ) assert t3_0.state.is_successful()
def test_non_keyed_states_are_hydrated_correctly_with_retries( monkeypatch, tmpdir): """ Ensures that retries longer than 10 minutes properly "hydrate" upstream states so that mapped tasks retry correctly - for mapped tasks, even non-data dependencies can affect the number of children spawned. """ @prefect.task def return_list(): return [1, 2, 3] @prefect.task(max_retries=1, retry_delay=datetime.timedelta(minutes=20)) def fail_once(): if prefect.context.get("task_run_count", 0) < 2: raise SyntaxError("bad") else: return 100 flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) with prefect.Flow(name="test-retries", result=LocalResult(dir=tmpdir)) as flow: t1 = fail_once.map(upstream_tasks=[return_list]) monkeypatch.setattr("requests.Session", MagicMock()) monkeypatch.setattr("requests.post", MagicMock()) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id), TaskRun( id=task_run_id_2, task_slug=flow.slugs[return_list], flow_run_id=flow_run_id, ), ] + [ TaskRun(id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, return_list] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_successful() # there should be a total of 4 task runs corresponding to each mapped task assert (len([ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1] ]) == 4) # t1's first child task should be retrying assert all([ isinstance(tr.state, Retrying) for tr in client.task_runs.values() if (tr.task_slug == flow.slugs[t1] and tr.map_index != -1) ]) # RUN A SECOND TIME with an artificially updated start time # and remove all in-memory data for idx, tr in client.task_runs.items(): if tr.task_slug == flow.slugs[t1] and tr.map_index != -1: tr.state.start_time = pendulum.now("UTC") for idx, tr in client.task_runs.items(): tr.state._result.value = None with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert (len([ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1] ]) == 4) assert all(tr.state.is_successful() for tr in client.task_runs.values())
def test_states_are_hydrated_correctly_with_retries(monkeypatch, tmpdir): """ Ensures that retries longer than 10 minutes properly "hydrate" upstream states so that mapped tasks retry correctly. """ flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) with prefect.Flow(name="test-retries", result=LocalResult(dir=tmpdir)) as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t2.max_retries = 1 t2.retry_delay = datetime.timedelta(minutes=100) monkeypatch.setattr("requests.Session", MagicMock()) monkeypatch.setattr("requests.post", MagicMock()) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id), TaskRun(id=task_run_id_2, task_slug=flow.slugs[t2], flow_run_id=flow_run_id), ] + [ TaskRun(id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id) for t in flow.tasks if t not in [t1, t2] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) assert client.flow_runs[flow_run_id].state.is_running() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2]: assert (len([ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t] ]) == 4) # t2's first child task should be retrying t2_0 = next(tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0) assert isinstance(t2_0.state, Retrying) # RUN A SECOND TIME with an artificially updated start time # and remove all in-memory data failed_id = [ t_id for t_id, tr in client.task_runs.items() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0 ].pop() client.task_runs[failed_id].state.start_time = pendulum.now("UTC") for idx, tr in client.task_runs.items(): tr.state._result.value = None with prefect.context(flow_run_id=flow_run_id): CloudFlowRunner(flow=flow).run(executor=LocalExecutor()) # t2's first child task should be successful t2_0 = next(tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0) assert t2_0.state.is_successful()
def test_deep_map_with_a_failure(monkeypatch, executor): flow_run_id = str(uuid.uuid4()) task_run_id_1 = str(uuid.uuid4()) task_run_id_2 = str(uuid.uuid4()) task_run_id_3 = str(uuid.uuid4()) with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow: t1 = plus_one.map([-1, 0, 1]) t2 = invert_fail_once.map(t1) t3 = plus_one.map(t2) client = MockedCloudClient( flow_runs=[FlowRun(id=flow_run_id)], task_runs=[ TaskRun( id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id ), TaskRun( id=task_run_id_2, task_slug=flow.slugs[t2], flow_run_id=flow_run_id ), TaskRun( id=task_run_id_3, task_slug=flow.slugs[t3], flow_run_id=flow_run_id ), ] + [ TaskRun( id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id ) for t in flow.tasks if t not in [t1, t2, t3] ], monkeypatch=monkeypatch, ) with prefect.context(flow_run_id=flow_run_id): state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks) assert state.is_failed() assert client.flow_runs[flow_run_id].state.is_failed() assert client.task_runs[task_run_id_1].state.is_mapped() assert client.task_runs[task_run_id_2].state.is_mapped() assert client.task_runs[task_run_id_3].state.is_mapped() # there should be a total of 4 task runs corresponding to each mapped task for t in [t1, t2, t3]: assert ( len( [ tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t] ] ) == 4 ) # t2's first child task should have failed t2_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t2] and tr.map_index == 0 ) assert t2_0.state.is_failed() # t3's first child task should have failed t3_0 = next( tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t3] and tr.map_index == 0 ) assert t3_0.state.is_failed()