def test_simple_metacurriculum(curriculum_brain_name): env = Simple1DEnvironment([BRAIN_NAME], use_discrete=False) curriculum_config = json.loads(dummy_curriculum_json_str) mc = MetaCurriculum({curriculum_brain_name: curriculum_config}) _check_environment_trains(env, TRAINER_CONFIG, meta_curriculum=mc, success_threshold=None)
def test_visual_sac(num_visual, use_discrete): env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=0) override_vals = {"batch_size": 16, "learning_rate": 3e-4} config = generate_config(SAC_CONFIG, override_vals) _check_environment_trains(env, config)
def test_visual_ppo(num_visual, use_discrete): env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=0) override_vals = {"learning_rate": 3.0e-4} config = generate_config(PPO_CONFIG, override_vals) _check_environment_trains(env, config)
def test_simple_ghost_fails(use_discrete): env = Simple1DEnvironment( [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete ) _check_environment_trains(env, GHOST_CONFIG_FAIL, success_threshold=None) processed_rewards = [ default_reward_processor(rewards) for rewards in env.final_rewards.values() ] success_threshold = 0.99 assert any(reward > success_threshold for reward in processed_rewards) and any( reward < success_threshold for reward in processed_rewards )
def test_simple_ghost(use_discrete): env = Simple1DEnvironment([BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete) override_vals = { "max_steps": 2500, "self_play": { "play_against_current_self_ratio": 1.0, "save_steps": 2000, "swap_steps": 2000, }, } config = generate_config(PPO_CONFIG, override_vals) _check_environment_trains(env, config)
def test_visual_advanced_ppo(vis_encode_type, num_visual): env = Simple1DEnvironment( [BRAIN_NAME], use_discrete=True, num_visual=num_visual, num_vector=0, step_size=0.5, vis_obs_size=(36, 36, 3), ) override_vals = { "learning_rate": 3.0e-4, "vis_encode_type": vis_encode_type, "max_steps": 500, "summary_freq": 100, } config = generate_config(PPO_CONFIG, override_vals) # The number of steps is pretty small for these encoders _check_environment_trains(env, config, success_threshold=0.5)
def test_simple_ghost_fails(use_discrete): env = Simple1DEnvironment( [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete ) # This config should fail because the ghosted policy is never swapped with a competent policy. # Swap occurs after max step is reached. override_vals = { "max_steps": 2500, "self_play": { "play_against_current_self_ratio": 1.0, "save_steps": 2000, "swap_steps": 4000, }, } config = generate_config(PPO_CONFIG, override_vals) _check_environment_trains(env, config, success_threshold=None) processed_rewards = [ default_reward_processor(rewards) for rewards in env.final_rewards.values() ] success_threshold = 0.9 assert any(reward > success_threshold for reward in processed_rewards) and any( reward < success_threshold for reward in processed_rewards )
def test_simple_sac(use_discrete): env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete) config = generate_config(SAC_CONFIG) _check_environment_trains(env, config)
def test_simple_ghost(use_discrete): env = Simple1DEnvironment( [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete ) _check_environment_trains(env, GHOST_CONFIG_PASS)
def test_simple_sac(use_discrete): env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete) _check_environment_trains(env, SAC_CONFIG)
def simple_env_factory(worker_id, config): env = Simple1DEnvironment(["1D"], use_discrete=True) return env