Ejemplo n.º 1
0
 def test_environments_are_created(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     env = SubprocessEnvManager(mock_env_factory,
                                EngineConfig.default_config(), 2)
     # Creates two processes
     env.create_worker.assert_has_calls([
         mock.call(0, env.step_queue, mock_env_factory,
                   EngineConfig.default_config()),
         mock.call(1, env.step_queue, mock_env_factory,
                   EngineConfig.default_config()),
     ])
     self.assertEqual(len(env.env_workers), 2)
Ejemplo n.º 2
0
def test_engine_configuration():
    sender = EngineConfigurationChannel()
    # We use a raw bytes channel to interpred the data
    receiver = RawBytesChannel(sender.channel_id)

    config = EngineConfig.default_config()
    sender.set_configuration(config)
    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    received_data = receiver.get_and_clear_received_messages()
    assert len(received_data) == 5  # 5 different messages one for each setting

    sent_time_scale = 4.5
    sender.set_configuration_parameters(time_scale=sent_time_scale)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
    message.read_int32()
    time_scale = message.read_float32()
    assert time_scale == sent_time_scale

    with pytest.raises(UnitySideChannelException):
        sender.set_configuration_parameters(width=None, height=42)

    with pytest.raises(UnityCommunicationException):
        # try to send data to the EngineConfigurationChannel
        sender.set_configuration_parameters(time_scale=sent_time_scale)
        data = SideChannelManager([sender]).generate_side_channel_messages()
        SideChannelManager([sender]).process_side_channel_message(data)
Ejemplo n.º 3
0
 def test_step_takes_steps_for_all_non_waiting_envs(self,
                                                    mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse(EnvironmentCommand.STEP, 0,
                             StepResponse(0, None, {})),
         EnvironmentResponse(EnvironmentCommand.STEP, 1,
                             StepResponse(1, None, {})),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager._step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with(EnvironmentCommand.STEP, step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.
                 current_all_step_result, i)
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
    def test_advance(self, external_brains_mock, step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
            worker_id, EnvironmentResponse("step", worker_id, worker_id)
        )
        env_manager = SubprocessEnvManager(
            mock_env_factory, EngineConfig.default_config(), 3
        )
        external_brains_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: Mock()}
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict)
        step_mock.return_value = [step_info]
        env_manager.advance()

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy
        env_manager.advance()
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
 def test_step_takes_steps_for_all_non_waiting_envs(self):
     SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
         worker_id, EnvironmentResponse("step", worker_id, worker_id))
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse("step", 0, StepResponse(0, None)),
         EnvironmentResponse("step", 1, StepResponse(1, None)),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager.step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with("step", step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.
                 current_all_brain_info, i)
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
 def test_reset_passes_reset_params(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 1)
     params = {"test": "params"}
     manager._reset_env(params)
     manager.env_workers[0].send.assert_called_with("reset", (params))
 def test_reset_passes_reset_params(self):
     SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
         worker_id, EnvironmentResponse("reset", worker_id, worker_id))
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 1)
     params = {"test": "params"}
     manager.reset(params)
     manager.env_workers[0].send.assert_called_with("reset", (params))
Ejemplo n.º 8
0
def _make_unity_env(
        env_path: Optional[str] = None,
        port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT,
        seed: int = -1,
        env_args: Optional[List[str]] = None,
        engine_config: Optional[EngineConfig] = None,
        side_channels: Optional[List[SideChannel]] = None) -> UnityEnvironment:
    """
    Create a UnityEnvironment.
    """
    # Use Unity Editor if env file is not provided.
    if env_path is None:
        port = UnityEnvironment.DEFAULT_EDITOR_PORT
    else:
        launch_string = UnityEnvironment.validate_environment_path(env_path)
        if launch_string is None:
            raise UnityEnvironmentException(
                f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."
            )
        logger.info(f"Starting environment from {env_path}.")

    # Configure Unity Engine.
    if engine_config is None:
        engine_config = EngineConfig.default_config()

    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_config)

    if side_channels is None:
        side_channels = [engine_configuration_channel]
    else:
        side_channels.append(engine_configuration_channel)

    # Find an available port to connect to Unity environment.
    while True:
        try:
            env = UnityEnvironment(
                file_name=env_path,
                seed=seed,
                base_port=port,
                args=env_args,
                side_channels=side_channels,
            )
        except UnityWorkerInUseException:
            logger.debug(f"port {port} in use.")
            port += 1
        else:
            logger.info(f"Connected to environment using port {port}.")
            break

    return env
Ejemplo n.º 9
0
def test_subprocess_env_raises_errors(num_envs):
    def failing_env_factory(worker_id, config):
        import time

        # Sleep momentarily to allow time for the EnvManager to be waiting for the
        # subprocess response.  We won't be able to capture failures from the subprocess
        # that cause it to close the pipe before we can send the first message.
        time.sleep(0.1)
        raise UnityEnvironmentException()

    env_manager = SubprocessEnvManager(failing_env_factory,
                                       EngineConfig.default_config(), num_envs)
    with pytest.raises(UnityEnvironmentException):
        env_manager.reset()
    env_manager.close()
Ejemplo n.º 10
0
    def test_reset_collects_results_from_all_envs(self, mock_create_worker):
        mock_create_worker.side_effect = create_worker_mock
        manager = SubprocessEnvManager(mock_env_factory,
                                       EngineConfig.default_config(), 4)

        params = {"test": "params"}
        res = manager._reset_env(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with(EnvironmentCommand.RESET, (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_step_result,
                i)
        assert res == list(
            map(lambda ew: ew.previous_step, manager.env_workers))
    def test_reset_collects_results_from_all_envs(self):
        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
            worker_id, EnvironmentResponse("reset", worker_id, worker_id))
        manager = SubprocessEnvManager(mock_env_factory,
                                       EngineConfig.default_config(), 4)

        params = {"test": "params"}
        res = manager.reset(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with("reset", (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_brain_info, i)
        assert res == list(
            map(lambda ew: ew.previous_step, manager.env_workers))
def test_subprocess_env_endtoend(num_envs):
    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    trainer_config = generate_config(PPO_CONFIG)
    # Run PPO using env_manager
    _check_environment_trains(
        simple_env_factory(0, []),
        trainer_config,
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.99
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
Ejemplo n.º 13
0
def test_subprocess_failing_step(num_envs):
    def failing_step_env_factory(_worker_id, _config):
        env = UnexpectedExceptionEnvironment(["1D"],
                                             use_discrete=True,
                                             to_raise=CustomTestOnlyException)
        return env

    env_manager = SubprocessEnvManager(failing_step_env_factory,
                                       EngineConfig.default_config())
    # Expect the exception raised to be routed back up to the top level.
    with pytest.raises(CustomTestOnlyException):
        check_environment_trains(
            failing_step_env_factory(0, []),
            {"1D": ppo_dummy_config()},
            env_manager=env_manager,
            success_threshold=None,
        )
    env_manager.close()
Ejemplo n.º 14
0
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], use_discrete=True)
        return env

    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    # Run PPO using env_manager
    check_environment_trains(
        simple_env_factory(0, []),
        {"1D": ppo_dummy_config()},
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.7
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
Ejemplo n.º 15
0
    def test_advance(self, mock_create_worker, training_behaviors_mock,
                     step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        mock_create_worker.side_effect = create_worker_mock
        env_manager = SubprocessEnvManager(mock_env_factory,
                                           EngineConfig.default_config(), 3)
        training_behaviors_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.side_effect = [
            mock_policy,
            mock_policy,
            AgentManagerQueue.Empty(),
        ]
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: (Mock(), Mock())}
        env_stats = {
            "averaged": (1.0, StatsAggregationMethod.AVERAGE),
            "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT),
        }
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict,
                                    env_stats)
        step_mock.return_value = [step_info]
        env_manager.process_steps(env_manager.get_steps())

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name][0],
            step_info.current_all_step_result[brain_name][1],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy