def test_deploy_flow_run_sleeps_until_start_time(monkeypatch, cloud_api): gql_return = MagicMock(return_value=MagicMock(data=MagicMock( write_run_logs=MagicMock(success=True)))) client = MagicMock() client.return_value.write_run_logs = gql_return monkeypatch.setattr("prefect.agent.agent.Client", MagicMock(return_value=client)) sleep = MagicMock() monkeypatch.setattr("time.sleep", sleep) dt = pendulum.now() agent = Agent() agent.deploy_flow = MagicMock() agent._deploy_flow_run(flow_run=GraphQLResult({ "id": "id", "serialized_state": Scheduled().serialize(), "scheduled_start_time": str(dt.add(seconds=10)), "version": 1, "task_runs": [ GraphQLResult({ "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), }) ], })) sleep_time = sleep.call_args[0][0] assert 10 >= sleep_time > 9 agent.deploy_flow.assert_called_once()
def test_mark_flow_as_submitted(monkeypatch, cloud_api, with_task_runs): agent = Agent() agent.client = MagicMock() agent._mark_flow_as_submitted(flow_run=GraphQLResult({ "id": "id", "serialized_state": Scheduled().serialize(), "version": 1, "task_runs": ([ GraphQLResult({ "id": "task-id", "version": 1, "serialized_state": Scheduled().serialize(), }) ] if with_task_runs else []), })) agent.client.set_flow_run_state.assert_called_once_with( flow_run_id="id", version=1, state=Submitted(message="Submitted for execution")) if with_task_runs: agent.client.set_task_run_state.assert_called_once_with( task_run_id="task-id", version=1, state=Submitted(message="Submitted for execution"), ) else: agent.client.set_task_run_state.assert_not_called()
class TestRunModels: @pytest.mark.parametrize( "state", [ Running(message="running", result=1), Scheduled(message="scheduled", result=1, start_time=pendulum.now()), ], ) async def test_flow_run_fields_from_state(self, state): dt = pendulum.now() info = models.FlowRunState.fields_from_state(state) assert info["state"] == type(state).__name__ assert info["timestamp"] > dt assert info["message"] == state.message assert info["result"] == state.result assert info["serialized_state"] == state.serialize() @pytest.mark.parametrize( "state", [ Running(message="running", result=1), Scheduled(message="scheduled", result=1, start_time=pendulum.now()), ], ) async def test_task_run_fields_from_state(self, state): dt = pendulum.now() info = models.TaskRunState.fields_from_state(state) assert info["state"] == type(state).__name__ assert info["timestamp"] > dt assert info["message"] == state.message assert info["result"] == state.result assert info["serialized_state"] == state.serialize()
def test_agent_logs_flow_run_exceptions(monkeypatch, runner_token, caplog, cloud_api): gql_return = MagicMock(return_value=MagicMock(data=MagicMock( write_run_logs=MagicMock(success=True)))) client = MagicMock() client.return_value.write_run_logs = gql_return monkeypatch.setattr("prefect.agent.agent.Client", MagicMock(return_value=client)) agent = Agent() agent.deploy_flow = MagicMock(side_effect=Exception("Error Here")) agent.deploy_and_update_flow_run(flow_run=GraphQLResult({ "id": "id", "serialized_state": Scheduled().serialize(), "version": 1, "task_runs": [ GraphQLResult({ "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), }) ], })) assert client.write_run_logs.called client.write_run_logs.assert_called_with([ dict(flow_run_id="id", level="ERROR", message="Error Here", name="agent") ]) assert "Logging platform error for flow run" in caplog.text
def test_get_flow_run_scheduled_start_time_from_state_time(cloud_mocks): start_time = pendulum.now("utc") states = [ Scheduled(start_time=start_time.add(seconds=10)).serialize(), Scheduled(start_time=start_time).serialize(), Scheduled().serialize(), ] # Attach db "created" times to the states, the second one is the newest states[0]["created"] = pendulum.now().subtract(seconds=10).isoformat() states[1]["created"] = pendulum.now().isoformat() # The last state will have an empty start time and no created time to test handling # of malformed data states[2]["start_time"] = None cloud_mocks.Client().graphql.return_value = GraphQLResult({ "data": { "flow_run": [{ "scheduled_start_time": (start_time.subtract(seconds=10).isoformat()), "states": states, }] } }) result = _get_flow_run_scheduled_start_time("flow-run-id") assert result == start_time
def test_update_states_passes_task_runs(monkeypatch, runner_token): gql_return = MagicMock(return_value=MagicMock( data=MagicMock(set_flow_run_state=None, set_task_run_state=None))) client = MagicMock() client.return_value.graphql = gql_return monkeypatch.setattr("prefect.agent.agent.Client", client) agent = Agent() assert not agent.update_states(flow_runs=[ GraphQLResult({ "id": "id", "serialized_state": Scheduled().serialize(), "version": 1, "task_runs": [ GraphQLResult({ "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), }) ], }) ])
def test_agent_process(monkeypatch, runner_token): gql_return = MagicMock(return_value=MagicMock(data=MagicMock( set_flow_run_state=None, set_task_run_state=None, getRunsInQueue=MagicMock(flow_run_ids=["id"]), flow_run=[ GraphQLResult({ "id": "id", "serialized_state": Scheduled().serialize(), "version": 1, "task_runs": [ GraphQLResult({ "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), }) ], }) ], ))) client = MagicMock() client.return_value.graphql = gql_return monkeypatch.setattr("prefect.agent.agent.Client", client) # Assert it doesn't return everything but all functions are called properly agent = Agent() assert agent.agent_process("id")
def test_agent_logs_flow_run_exceptions(monkeypatch, runner_token): gql_return = MagicMock( return_value=MagicMock(data=MagicMock(writeRunLogs=MagicMock(success=True))) ) client = MagicMock() client.return_value.write_run_logs = gql_return monkeypatch.setattr("prefect.agent.agent.Client", MagicMock(return_value=client)) agent = Agent() agent._log_flow_run_exceptions( flow_runs=[ GraphQLResult( { "id": "id", "serialized_state": Scheduled().serialize(), "version": 1, "task_runs": [ GraphQLResult( { "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), } ) ], } ) ], exc=ValueError("Error Here"), ) assert client.write_run_logs.called client.write_run_logs.assert_called_with( [dict(flowRunId="id", level="ERROR", message="Error Here", name="agent")] )
def test_deploy_flow_run_logs_flow_run_exceptions(monkeypatch, caplog, cloud_api): gql_return = MagicMock( return_value=MagicMock(data=MagicMock(write_run_logs=MagicMock(success=True))) ) client = MagicMock() client.return_value.write_run_logs = gql_return monkeypatch.setattr("prefect.agent.agent.Client", MagicMock(return_value=client)) agent = Agent() agent.deploy_flow = MagicMock(side_effect=Exception("Error Here")) agent._deploy_flow_run( flow_run=GraphQLResult( { "id": "id", "serialized_state": Scheduled().serialize(), "scheduled_start_time": str(pendulum.now()), "version": 1, "task_runs": [ GraphQLResult( { "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), } ) ], } ) ) assert client.write_run_logs.called client.write_run_logs.assert_called_with( [dict(flow_run_id="id", level="ERROR", message="Error Here", name="agent")] ) assert "Exception encountered while deploying flow run id" in caplog.text
def test_agent_process(monkeypatch, runner_token, cloud_api): gql_return = MagicMock(return_value=MagicMock(data=MagicMock( set_flow_run_state=None, set_task_run_state=None, get_runs_in_queue=MagicMock(flow_run_ids=["id"]), flow_run=[ GraphQLResult({ "id": "id", "serialized_state": Scheduled().serialize(), "version": 1, "task_runs": [ GraphQLResult({ "id": "id", "version": 1, "serialized_state": Scheduled().serialize(), }) ], }) ], ))) client = MagicMock() client.return_value.graphql = gql_return monkeypatch.setattr("prefect.agent.agent.Client", client) executor = MagicMock() future_mock = MagicMock() executor.submit = MagicMock(return_value=future_mock) agent = Agent() assert agent.agent_process(executor) assert executor.submit.called assert future_mock.add_done_callback.called
async def test_get_flow_run_in_queue_filters_labels_on_task_runs_correctly( self, flow_run_id, labeled_flow_run_id, labeled_task_run_id, task_run_id, ): await states.set_task_run_state( task_run_id=labeled_task_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) await states.set_task_run_state( task_run_id=task_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) super_flow_runs = await runs.get_runs_in_queue( labels=["foo", "bar", "chris"]) random_flow_runs = await runs.get_runs_in_queue(labels=["dev"]) mixed_flow_runs = await runs.get_runs_in_queue( labels=["foo", "staging"]) assert labeled_flow_run_id in super_flow_runs assert flow_run_id not in super_flow_runs assert labeled_flow_run_id not in random_flow_runs assert flow_run_id not in random_flow_runs assert labeled_flow_run_id not in mixed_flow_runs assert flow_run_id not in mixed_flow_runs
async def test_start_with_one_root_then_other_succeeds( self, tenant_id, flow, agent): flow_run_id = await api.runs.create_flow_run(flow_id=flow.server_id) await api.states.set_flow_run_state(flow_run_id, state=Running()) # ---------------------------------------------------------- # first run - start with numbers 1 # schedule numbers1 task to run await api.states.set_task_run_state( task_run_id=await api.runs.get_or_create_task_run(flow_run_id=flow_run_id, task_id=flow.numbers1.id), state=Scheduled(), ) await agent.run_scheduled(flow_id=flow.server_id) # wait for states to be written to the db await asyncio.sleep(1.0) fr, task_states = await await_flow_run_state(flow_run_id, "Running", 1) assert fr.serialized_state["type"] == "Running" assert len(task_states) == 3 # numbers1 task assert task_states[(flow.numbers1.slug, -1)].is_successful() # numbers2 task assert task_states[(flow.numbers2.slug, -1)].is_pending() # add parent task assert task_states[(flow.add.slug, -1)].is_pending() # ---------------------------------------------------------- # second run - start with numbers 2 # schedule numbers 2 task to run await api.states.set_task_run_state( task_run_id=await api.runs.get_or_create_task_run(flow_run_id=flow_run_id, task_id=flow.numbers2.id), state=Scheduled(), ) await agent.run_scheduled(flow_id=flow.server_id) # wait for states to be written to the db await asyncio.sleep(1.0) fr, task_states = await await_flow_run_state(flow_run_id, "Success") assert fr.serialized_state["type"] == "Success" assert len(task_states) == 6 # numbers1 task assert task_states[(flow.numbers1.slug, -1)].is_successful() # numbers2 task assert task_states[(flow.numbers2.slug, -1)].is_successful() # add parent task assert task_states[(flow.add.slug, -1)].is_mapped() # add child tasks for i in range(3): assert task_states[(flow.add.slug, i)].is_successful()
def test_scheduled_states_with_future_start_time(self): state = Scheduled(start_time=pendulum.now("utc") + datetime.timedelta(minutes=10)) with pytest.raises(ENDRUN) as exc: FlowRunner(flow=Flow(name="test")).check_flow_reached_start_time( state=state) assert exc.value.state is state
def _from_task_run_data(cls, task_run: dict) -> "TaskRunView": """ Instantiate a `TaskRunView` from serialized data This method deserializes objects into their Prefect types. Args: - task_run: The serialized task run data Returns: A populated `TaskRunView` instance """ task_run = task_run.copy() # Create a copy to avoid mutation task_run_id = task_run.pop("id") task_data = task_run.pop("task") # The serialized state _could_ be null if the backend has not # created it yet, this would typically be seen with mapped tasks serialized_state = task_run.pop( "serialized_state") or Scheduled().serialize() return cls( task_run_id=task_run_id, state=State.deserialize(serialized_state), task_id=task_data["id"], task_slug=task_data["slug"], **task_run, )
async def test_future_flow_runs_are_not_retrieved(self, flow_run_id, tenant_id): await api.states.set_flow_run_state( flow_run_id=flow_run_id, state=Scheduled(start_time=pendulum.now("utc").add(days=1)), ) assert not await api.runs.get_runs_in_queue(tenant_id=tenant_id)
async def test_get_flow_run_in_queue_uses_labels_on_task_runs( self, flow_run_id, labeled_flow_run_id, labeled_task_run_id, task_run_id, ): await states.set_task_run_state( task_run_id=labeled_task_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) await states.set_task_run_state( task_run_id=task_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) flow_runs = await runs.get_runs_in_queue(labels=["foo", "bar"]) assert labeled_flow_run_id in flow_runs assert flow_run_id not in flow_runs
def test_scheduled_states_without_start_time(self): state = Scheduled(start_time=None) assert ( FlowRunner(flow=Flow(name="test")).check_flow_reached_start_time( state=state ) is state )
async def test_get_flow_run_in_queue_uses_labels(self, tenant_id, flow_run_id, labeled_flow_run_id): await api.states.set_flow_run_state( flow_run_id=flow_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) await api.states.set_flow_run_state( flow_run_id=labeled_flow_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) flow_runs = await api.runs.get_runs_in_queue(tenant_id=tenant_id, labels=["foo", "bar"]) assert labeled_flow_run_id in flow_runs assert flow_run_id not in flow_runs
async def test_get_flow_run_in_queue(self, flow_run_id, tenant_id): await api.states.set_flow_run_state( flow_run_id=flow_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) flow_runs = await api.runs.get_runs_in_queue(tenant_id=tenant_id) assert flow_run_id in flow_runs
def test_scheduled_states_with_past_start_time(self): state = Scheduled( start_time=pendulum.now("utc") - datetime.timedelta(minutes=1) ) assert ( FlowRunner(flow=Flow(name="test")).check_flow_reached_start_time( state=state ) is state )
def test_mapped_task_can_be_scheduled(self, executor): with Flow(name="test") as flow: res = ReturnTask().map([0, 0]) state = FlowRunner(flow=flow).run( return_tasks=[res], executor=executor, task_states={res: Scheduled(start_time=pendulum.now().subtract(minutes=1))}, ) assert state.is_successful()
async def test_future_flow_runs_are_not_retrieved( self, flow_run_id, ): await models.FlowRun.where({"id": {"_neq": flow_run_id}}).delete() await states.set_flow_run_state( flow_run_id=flow_run_id, state=Scheduled(start_time=pendulum.now("utc").add(days=1)), ) assert not await runs.get_runs_in_queue()
def test_mapped_task_can_be_scheduled_for_future(self, executor): with Flow(name="test") as flow: res = ReturnTask().map([0, 0]) state = FlowRunner(flow=flow).run( return_tasks=[res], executor=executor, task_states={res: Scheduled(start_time=pendulum.now().add(hours=1))}, ) assert state.is_running() assert isinstance(state.result[res], Scheduled)
class TestCheckFlowPendingOrRunning: @pytest.mark.parametrize("state", [Pending(), Running(), Retrying(), Scheduled()]) def test_pending_or_running_are_ok(self, state): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state) assert new_state is state @pytest.mark.parametrize("state", [Finished(), Success(), Failed(), Skipped()]) def test_not_pending_or_running_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
def test_creates_subprocess_correctly(self, cloud_mocks, mocks, include_local_env): # Returned a scheduled flow run to start cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled() # Return a finished flow run after the first iteration cloud_mocks.FlowRunView().get_latest().state = Success() execute_flow_run_in_subprocess("flow-run-id", include_local_env=include_local_env) # Should pass the correct flow run id to wait for mocks.wait_for_flow_run_start_time.assert_called_once_with( "flow-run-id") # Merge the starting env and the env generated for a flow run base_env = os.environ.copy() if include_local_env else {} generated_env = { "PREFECT__CLOUD__SEND_FLOW_RUN_LOGS": "True", "PREFECT__LOGGING__LEVEL": "INFO", "PREFECT__LOGGING__FORMAT": "[%(asctime)s] %(levelname)s - %(name)s | %(message)s", "PREFECT__LOGGING__DATEFMT": "%Y-%m-%d %H:%M:%S%z", "PREFECT__BACKEND": "cloud", "PREFECT__CLOUD__API": "https://api.prefect.io", "PREFECT__CLOUD__TENANT_ID": "", "PREFECT__CLOUD__API_KEY": cloud_mocks.Client().api_key, "PREFECT__CONTEXT__FLOW_RUN_ID": "flow-run-id", "PREFECT__CONTEXT__FLOW_ID": cloud_mocks.FlowRunView.from_flow_run_id().flow_id, "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", } expected_env = {**base_env, **generated_env} # Calls the correct command w/ environment variables mocks.subprocess.run.assert_called_once_with( [sys.executable, "-m", "prefect", "execute", "flow-run"], env=expected_env, ) # Return code is checked mocks.subprocess.run().check_returncode.assert_called_once()
async def test_get_runs_in_queue( self, run_query, flow_run_id, ): await api.states.set_flow_run_state( flow_run_id=flow_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) result = await run_query(query=self.mutation, variables=dict(input=dict())) assert flow_run_id in result.data.get_runs_in_queue.flow_run_ids
def test_loops_until_flow_run_is_finished(self, cloud_mocks, mocks): cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled() cloud_mocks.FlowRunView.from_flow_run_id().get_latest.side_effect = [ MagicMock(state=Running()), MagicMock(state=Running()), MagicMock(state=Success()), ] execute_flow_run_in_subprocess("flow-run-id") # Ran the subprocess twice assert mocks.subprocess.run.call_count == 2 # Waited each time assert mocks.wait_for_flow_run_start_time.call_count == 2
def test_handles_bad_subprocess_result(self, cloud_mocks, mocks): cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled() mocks.subprocess.run.return_value.check_returncode.side_effect = ( CalledProcessError(cmd="foo", returncode=1)) # Re-raised as `RuntmeError` with pytest.raises(RuntimeError, match="flow run process failed"): execute_flow_run_in_subprocess("flow-run-id") # Only tried to run once mocks.subprocess.run.assert_called_once() # Flow run is not failed at this time -- left to the FlowRunner mocks.fail_flow_run.assert_not_called()
def test_handles_signal_interrupt(self, cloud_mocks, mocks): cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled() mocks.subprocess.run.side_effect = KeyboardInterrupt() # Keyboard interrupt should be re-raised with pytest.raises(KeyboardInterrupt): execute_flow_run_in_subprocess("flow-run-id") # Only tried to run once mocks.subprocess.run.assert_called_once() # Flow run is failed with the proper message mocks.fail_flow_run.assert_called_once_with( flow_run_id="flow-run-id", message="Flow run received an interrupt signal.")
async def test_getting_a_flow_run_from_queue_doesnt_dequeue_it( self, flow_run_id, tenant_id): await api.states.set_flow_run_state( flow_run_id=flow_run_id, state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)), ) # retrieve api.runs multiple times flow_runs1 = await api.runs.get_runs_in_queue(tenant_id=tenant_id) flow_runs2 = await api.runs.get_runs_in_queue(tenant_id=tenant_id) flow_runs3 = await api.runs.get_runs_in_queue(tenant_id=tenant_id) for frs in [flow_runs1, flow_runs2, flow_runs3]: assert flow_run_id in frs