def test_advance(self, external_brains_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id) ) env_manager = SubprocessEnvManager( mock_env_factory, EngineConfig.default_config(), 3 ) external_brains_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: Mock()} step_info = EnvironmentStep(step_info_dict, 0, action_info_dict) step_mock.return_value = [step_info] env_manager.advance() # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy env_manager.advance() assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def test_advance(self, mock_create_worker, training_behaviors_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} mock_create_worker.side_effect = create_worker_mock env_manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) training_behaviors_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.side_effect = [ mock_policy, mock_policy, AgentManagerQueue.Empty(), ] env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: (Mock(), Mock())} env_stats = { "averaged": (1.0, StatsAggregationMethod.AVERAGE), "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT), } step_info = EnvironmentStep(step_info_dict, 0, action_info_dict, env_stats) step_mock.return_value = [step_info] env_manager.advance() # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name][0], step_info.current_all_step_result[brain_name][1], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy