def test_subprocess_env_endtoend(num_envs):
    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    trainer_config = generate_config(PPO_CONFIG)
    # Run PPO using env_manager
    _check_environment_trains(
        simple_env_factory(0, []),
        trainer_config,
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.99
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], use_discrete=True)
        return env

    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    trainer_config = generate_config(PPO_CONFIG,
                                     override_vals={"max_steps": 5000})
    # Run PPO using env_manager
    _check_environment_trains(
        simple_env_factory(0, []),
        trainer_config,
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.7
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()