def test_environments_are_created(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock env = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 2) # Creates two processes env.create_worker.assert_has_calls([ mock.call(0, env.step_queue, mock_env_factory, EngineConfig.default_config()), mock.call(1, env.step_queue, mock_env_factory, EngineConfig.default_config()), ]) self.assertEqual(len(env.env_workers), 2)
def test_engine_configuration(): sender = EngineConfigurationChannel() # We use a raw bytes channel to interpred the data receiver = RawBytesChannel(sender.channel_id) config = EngineConfig.default_config() sender.set_configuration(config) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) received_data = receiver.get_and_clear_received_messages() assert len(received_data) == 5 # 5 different messages one for each setting sent_time_scale = 4.5 sender.set_configuration_parameters(time_scale=sent_time_scale) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) message = IncomingMessage(receiver.get_and_clear_received_messages()[0]) message.read_int32() time_scale = message.read_float32() assert time_scale == sent_time_scale with pytest.raises(UnitySideChannelException): sender.set_configuration_parameters(width=None, height=42) with pytest.raises(UnityCommunicationException): # try to send data to the EngineConfigurationChannel sender.set_configuration_parameters(time_scale=sent_time_scale) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([sender]).process_side_channel_message(data)
def test_step_takes_steps_for_all_non_waiting_envs(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})), EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(1, None, {})), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] manager.env_workers[0].previous_step = last_steps[0] manager.env_workers[1].previous_step = last_steps[1] manager.env_workers[2].previous_step = last_steps[2] manager.env_workers[2].waiting = True manager._take_step = Mock(return_value=step_mock) res = manager._step() for i, env in enumerate(manager.env_workers): if i < 2: env.send.assert_called_with(EnvironmentCommand.STEP, step_mock) manager.step_queue.get_nowait.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step. current_all_step_result, i) assert res == [ manager.env_workers[0].previous_step, manager.env_workers[1].previous_step, ]
def test_advance(self, external_brains_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id) ) env_manager = SubprocessEnvManager( mock_env_factory, EngineConfig.default_config(), 3 ) external_brains_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: Mock()} step_info = EnvironmentStep(step_info_dict, 0, action_info_dict) step_mock.return_value = [step_info] env_manager.advance() # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy env_manager.advance() assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy
def test_step_takes_steps_for_all_non_waiting_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("step", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) manager.step_queue = Mock() manager.step_queue.get_nowait.side_effect = [ EnvironmentResponse("step", 0, StepResponse(0, None)), EnvironmentResponse("step", 1, StepResponse(1, None)), EmptyQueue(), ] step_mock = Mock() last_steps = [Mock(), Mock(), Mock()] manager.env_workers[0].previous_step = last_steps[0] manager.env_workers[1].previous_step = last_steps[1] manager.env_workers[2].previous_step = last_steps[2] manager.env_workers[2].waiting = True manager._take_step = Mock(return_value=step_mock) res = manager.step() for i, env in enumerate(manager.env_workers): if i < 2: env.send.assert_called_with("step", step_mock) manager.step_queue.get_nowait.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step. current_all_brain_info, i) assert res == [ manager.env_workers[0].previous_step, manager.env_workers[1].previous_step, ]
def test_reset_passes_reset_params(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 1) params = {"test": "params"} manager._reset_env(params) manager.env_workers[0].send.assert_called_with("reset", (params))
def test_reset_passes_reset_params(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 1) params = {"test": "params"} manager.reset(params) manager.env_workers[0].send.assert_called_with("reset", (params))
def _make_unity_env( env_path: Optional[str] = None, port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT, seed: int = -1, env_args: Optional[List[str]] = None, engine_config: Optional[EngineConfig] = None, side_channels: Optional[List[SideChannel]] = None) -> UnityEnvironment: """ Create a UnityEnvironment. """ # Use Unity Editor if env file is not provided. if env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT else: launch_string = UnityEnvironment.validate_environment_path(env_path) if launch_string is None: raise UnityEnvironmentException( f"Couldn't launch the {env_path} environment. Provided filename does not match any environments." ) logger.info(f"Starting environment from {env_path}.") # Configure Unity Engine. if engine_config is None: engine_config = EngineConfig.default_config() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_config) if side_channels is None: side_channels = [engine_configuration_channel] else: side_channels.append(engine_configuration_channel) # Find an available port to connect to Unity environment. while True: try: env = UnityEnvironment( file_name=env_path, seed=seed, base_port=port, args=env_args, side_channels=side_channels, ) except UnityWorkerInUseException: logger.debug(f"port {port} in use.") port += 1 else: logger.info(f"Connected to environment using port {port}.") break return env
def test_subprocess_env_raises_errors(num_envs): def failing_env_factory(worker_id, config): import time # Sleep momentarily to allow time for the EnvManager to be waiting for the # subprocess response. We won't be able to capture failures from the subprocess # that cause it to close the pipe before we can send the first message. time.sleep(0.1) raise UnityEnvironmentException() env_manager = SubprocessEnvManager(failing_env_factory, EngineConfig.default_config(), num_envs) with pytest.raises(UnityEnvironmentException): env_manager.reset() env_manager.close()
def test_reset_collects_results_from_all_envs(self, mock_create_worker): mock_create_worker.side_effect = create_worker_mock manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 4) params = {"test": "params"} res = manager._reset_env(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with(EnvironmentCommand.RESET, (params)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_step_result, i) assert res == list( map(lambda ew: ew.previous_step, manager.env_workers))
def test_reset_collects_results_from_all_envs(self): SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker( worker_id, EnvironmentResponse("reset", worker_id, worker_id)) manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 4) params = {"test": "params"} res = manager.reset(params) for i, env in enumerate(manager.env_workers): env.send.assert_called_with("reset", (params)) env.recv.assert_called() # Check that the "last steps" are set to the value returned for each step self.assertEqual( manager.env_workers[i].previous_step.current_all_brain_info, i) assert res == list( map(lambda ew: ew.previous_step, manager.env_workers))
def test_subprocess_env_endtoend(num_envs): env_manager = SubprocessEnvManager(simple_env_factory, EngineConfig.default_config(), num_envs) trainer_config = generate_config(PPO_CONFIG) # Run PPO using env_manager _check_environment_trains( simple_env_factory(0, []), trainer_config, env_manager=env_manager, success_threshold=None, ) # Note we can't check the env's rewards directly (since they're in separate processes) so we # check the StatsReporter's debug stat writer's last reward. assert isinstance(StatsReporter.writers[0], DebugWriter) assert all(val > 0.99 for val in StatsReporter.writers[0].get_last_rewards().values()) env_manager.close()
def test_subprocess_failing_step(num_envs): def failing_step_env_factory(_worker_id, _config): env = UnexpectedExceptionEnvironment(["1D"], use_discrete=True, to_raise=CustomTestOnlyException) return env env_manager = SubprocessEnvManager(failing_step_env_factory, EngineConfig.default_config()) # Expect the exception raised to be routed back up to the top level. with pytest.raises(CustomTestOnlyException): check_environment_trains( failing_step_env_factory(0, []), {"1D": ppo_dummy_config()}, env_manager=env_manager, success_threshold=None, ) env_manager.close()
def test_subprocess_env_endtoend(num_envs): def simple_env_factory(worker_id, config): env = SimpleEnvironment(["1D"], use_discrete=True) return env env_manager = SubprocessEnvManager(simple_env_factory, EngineConfig.default_config(), num_envs) # Run PPO using env_manager check_environment_trains( simple_env_factory(0, []), {"1D": ppo_dummy_config()}, env_manager=env_manager, success_threshold=None, ) # Note we can't check the env's rewards directly (since they're in separate processes) so we # check the StatsReporter's debug stat writer's last reward. assert isinstance(StatsReporter.writers[0], DebugWriter) assert all(val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()) env_manager.close()
def test_advance(self, mock_create_worker, training_behaviors_mock, step_mock): brain_name = "testbrain" action_info_dict = {brain_name: MagicMock()} mock_create_worker.side_effect = create_worker_mock env_manager = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 3) training_behaviors_mock.return_value = [brain_name] agent_manager_mock = mock.Mock() mock_policy = mock.Mock() agent_manager_mock.policy_queue.get_nowait.side_effect = [ mock_policy, mock_policy, AgentManagerQueue.Empty(), ] env_manager.set_agent_manager(brain_name, agent_manager_mock) step_info_dict = {brain_name: (Mock(), Mock())} env_stats = { "averaged": (1.0, StatsAggregationMethod.AVERAGE), "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT), } step_info = EnvironmentStep(step_info_dict, 0, action_info_dict, env_stats) step_mock.return_value = [step_info] env_manager.process_steps(env_manager.get_steps()) # Test add_experiences env_manager._step.assert_called_once() agent_manager_mock.add_experiences.assert_called_once_with( step_info.current_all_step_result[brain_name][0], step_info.current_all_step_result[brain_name][1], 0, step_info.brain_name_to_action_info[brain_name], ) # Test policy queue assert env_manager.policies[brain_name] == mock_policy assert agent_manager_mock.policy == mock_policy