def test_simple_asymm_ghost_fails(use_discrete): # Make opponent for asymmetric case brain_name_opp = BRAIN_NAME + "Opp" env = SimpleEnvironment( [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete) # This config should fail because the team that us not learning when both have reached # max step should be executing the initial, untrained poliy. self_play_settings = SelfPlaySettings( play_against_latest_model_ratio=0.0, save_steps=5000, swap_steps=5000, team_change=2000, ) config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2000) _check_environment_trains(env, { BRAIN_NAME: config, brain_name_opp: config }, success_threshold=None) processed_rewards = [ default_reward_processor(rewards) for rewards in env.final_rewards.values() ] success_threshold = 0.9 assert any(reward > success_threshold for reward in processed_rewards) and any( reward < success_threshold for reward in processed_rewards)
def test_gail_visual_ppo(simple_record, action_sizes): demo_path = simple_record(action_sizes, num_visual=1, num_vector=0) env = SimpleEnvironment( [BRAIN_NAME], num_visual=1, num_vector=0, action_sizes=action_sizes, step_size=0.2, ) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters, learning_rate=3e-4) config = attr.evolve( PPO_TF_CONFIG, reward_signals=reward_signals, hyperparameters=hyperparams, behavioral_cloning=bc_settings, max_steps=1000, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_visual_advanced_sac(vis_encode_type, num_visual): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=True, num_visual=num_visual, num_vector=0, step_size=0.5, vis_obs_size=(36, 36, 3), ) new_networksettings = attr.evolve( SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)) new_hyperparams = attr.evolve( SAC_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4, buffer_init_steps=0, ) config = attr.evolve( SAC_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=100, ) # The number of steps is pretty small for these encoders _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
def test_visual_advanced_sac(vis_encode_type, num_visual): env = SimpleEnvironment( [BRAIN_NAME], action_sizes=(0, 1), num_visual=num_visual, num_vector=0, step_size=0.5, vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3), ) new_networksettings = attr.evolve( SAC_TF_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)) new_hyperparams = attr.evolve( SAC_TF_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4, buffer_init_steps=0, ) config = attr.evolve( SAC_TF_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=200, framework=FrameworkType.TENSORFLOW, ) # The number of steps is pretty small for these encoders _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
def test_2d_ppo(use_discrete): env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5) config = generate_config(PPO_CONFIG) _check_environment_trains(env, config)
def test_simple_asymm_ghost_fails(use_discrete): # Make opponent for asymmetric case brain_name_opp = BRAIN_NAME + "Opp" env = SimpleEnvironment( [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete) # This config should fail because the team that us not learning when both have reached # max step should be executing the initial, untrained poliy. override_vals = { "max_steps": 2000, "self_play": { "play_against_latest_model_ratio": 0.0, "save_steps": 5000, "swap_steps": 5000, "team_change": 2000, }, } config = generate_config(PPO_CONFIG, override_vals) config[brain_name_opp] = config[BRAIN_NAME] _check_environment_trains(env, config, success_threshold=None) processed_rewards = [ default_reward_processor(rewards) for rewards in env.final_rewards.values() ] success_threshold = 0.9 assert any(reward > success_threshold for reward in processed_rewards) and any( reward < success_threshold for reward in processed_rewards)
def test_gail_visual_ppo(simple_record, use_discrete): demo_path = simple_record(use_discrete, num_visual=1, num_vector=0) env = SimpleEnvironment( [BRAIN_NAME], num_visual=1, num_vector=0, use_discrete=use_discrete, step_size=0.2, ) override_vals = { "max_steps": 500, "learning_rate": 3.0e-4, "behavioral_cloning": { "demo_path": demo_path, "strength": 1.0, "steps": 1000 }, "reward_signals": { "gail": { "strength": 1.0, "gamma": 0.99, "encoding_size": 32, "demo_path": demo_path, } }, } config = generate_config(PPO_CONFIG, override_vals) _check_environment_trains(env, config, success_threshold=0.9)
def test_2d_sac(use_discrete): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8 ) new_hyperparams = attr.evolve(SAC_CONFIG.hyperparameters, buffer_init_steps=2000) config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams, max_steps=10000) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
def test_gail_visual_sac(simple_record, use_discrete): demo_path = simple_record(use_discrete, num_visual=1, num_vector=0) env = SimpleEnvironment( [BRAIN_NAME], num_visual=1, num_vector=0, use_discrete=use_discrete, step_size=0.2, ) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } hyperparams = attr.evolve(SAC_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16) config = attr.evolve( SAC_CONFIG, reward_signals=reward_signals, hyperparameters=hyperparams, behavioral_cloning=bc_settings, max_steps=500, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_2d_sac(use_discrete): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8 ) override_vals = {"buffer_init_steps": 2000, "max_steps": 4000} config = generate_config(SAC_CONFIG, override_vals) _check_environment_trains(env, config, success_threshold=0.8)
def test_hybrid_visual_ppo(num_visual): env = SimpleEnvironment([BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1)) new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4) config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams) check_environment_trains(env, {BRAIN_NAME: config})
def test_2d_sac(action_sizes): env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8) new_hyperparams = attr.evolve( SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000 ) config = attr.evolve( SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000 ) check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
def test_2d_ppo(use_discrete): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8 ) new_hyperparams = attr.evolve( PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640 ) config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000) _check_environment_trains(env, {BRAIN_NAME: config})
def test_simple_ghost(use_discrete): env = SimpleEnvironment( [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete ) self_play_settings = SelfPlaySettings( play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000 ) config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500) _check_environment_trains(env, {BRAIN_NAME: config})
def test_simple_metacurriculum(curriculum_brain_name): env = SimpleEnvironment([BRAIN_NAME], use_discrete=False) curriculum_config = json.loads(dummy_curriculum_json_str) mc = MetaCurriculum({curriculum_brain_name: curriculum_config}) trainer_config = yaml.safe_load(TRAINER_CONFIG) _check_environment_trains(env, trainer_config, meta_curriculum=mc, success_threshold=None)
def test_visual_sac(num_visual, use_discrete): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=0, step_size=0.2, ) override_vals = {"batch_size": 16, "learning_rate": 3e-4} config = generate_config(SAC_CONFIG, override_vals) _check_environment_trains(env, config)
def test_visual_ppo(num_visual, use_discrete): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=0, step_size=0.2, ) new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4) config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams) _check_environment_trains(env, {BRAIN_NAME: config})
def test_var_len_obs_ppo(num_vis, num_vector, num_var_len, action_sizes): env = SimpleEnvironment( [BRAIN_NAME], action_sizes=action_sizes, num_visual=num_vis, num_vector=num_vector, num_var_len=num_var_len, step_size=0.2, ) new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4) config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams) check_environment_trains(env, {BRAIN_NAME: config})
def test_simple_ghost(use_discrete): env = SimpleEnvironment([BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete) override_vals = { "max_steps": 2500, "self_play": { "play_against_latest_model_ratio": 1.0, "save_steps": 2000, "swap_steps": 2000, }, } config = generate_config(PPO_CONFIG, override_vals) _check_environment_trains(env, config)
def test_visual_sac(num_visual, action_sizes): env = SimpleEnvironment( [BRAIN_NAME], action_sizes=action_sizes, num_visual=num_visual, num_vector=0, step_size=0.2, ) new_hyperparams = attr.evolve(SAC_TORCH_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4) config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams) check_environment_trains(env, {BRAIN_NAME: config})
def test_simple_ghost(action_sizes): env = SimpleEnvironment([BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes) self_play_settings = SelfPlaySettings(play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000) config = attr.evolve( PPO_TF_CONFIG, self_play=self_play_settings, max_steps=2500, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config})
def test_hybrid_ppo(action_size): env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8) new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings) new_hyperparams = attr.evolve( PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024 ) config = attr.evolve( PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network_settings, max_steps=10000, ) check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_gail(simple_record, use_discrete, trainer_config): demo_path = simple_record(use_discrete) env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2) bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) reward_signals = { RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) } config = attr.evolve( trainer_config, reward_signals=reward_signals, behavioral_cloning=bc_settings, max_steps=500, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_simple_asymm_ghost(use_discrete): # Make opponent for asymmetric case brain_name_opp = BRAIN_NAME + "Opp" env = SimpleEnvironment( [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete ) self_play_settings = SelfPlaySettings( play_against_latest_model_ratio=1.0, save_steps=10000, swap_steps=10000, team_change=400, ) config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=4000) _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
def test_2d_ppo(action_sizes): env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8) new_hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters, batch_size=64, buffer_size=640) config = attr.evolve( PPO_TF_CONFIG, hyperparameters=new_hyperparams, max_steps=10000, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config})
def test_hybrid_visual_sac(num_visual): env = SimpleEnvironment( [BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1) ) new_hyperparams = attr.evolve( SAC_TORCH_CONFIG.hyperparameters, buffer_size=50000, batch_size=128, learning_rate=3.0e-4, ) config = attr.evolve( SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=3000 ) check_environment_trains(env, {BRAIN_NAME: config})
def test_hybrid_sac(action_size): env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8) new_hyperparams = attr.evolve( SAC_TORCH_CONFIG.hyperparameters, buffer_size=50000, batch_size=256, buffer_init_steps=0, ) config = attr.evolve( SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200 ) check_environment_trains( env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336 )
def test_hybrid_visual_ppo(num_visual, training_seed): env = SimpleEnvironment([BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1)) new_hyperparams = attr.evolve( PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024, learning_rate=1e-4, ) config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=8000) check_environment_trains(env, {BRAIN_NAME: config}, training_seed=training_seed)
def test_visual_ppo(num_visual, action_sizes): env = SimpleEnvironment( [BRAIN_NAME], action_sizes=action_sizes, num_visual=num_visual, num_vector=0, step_size=0.2, ) new_hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters, learning_rate=3.0e-4) config = attr.evolve( PPO_TF_CONFIG, hyperparameters=new_hyperparams, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config})
def test_visual_sac(num_visual, use_discrete): env = SimpleEnvironment( [BRAIN_NAME], use_discrete=use_discrete, num_visual=num_visual, num_vector=0, step_size=0.2, ) new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4) config = attr.evolve( SAC_TF_CONFIG, hyperparameters=new_hyperparams, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config})