コード例 #1
0
ファイル: test_simple_rl.py プロジェクト: MXSLaser/ml-agents
def test_recurrent_poca(action_sizes, is_multiagent):
    if is_multiagent:
        # This is not a recurrent environment, just check if LSTM doesn't crash
        env = MultiAgentEnvironment([BRAIN_NAME],
                                    action_sizes=action_sizes,
                                    num_agents=2)
    else:
        # Actually test LSTM here
        env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        POCA_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        POCA_TORCH_CONFIG.hyperparameters,
        learning_rate=1.0e-3,
        batch_size=64,
        buffer_size=128,
    )
    config = attr.evolve(
        POCA_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=500 if is_multiagent else 6000,
    )
    check_environment_trains(env, {BRAIN_NAME: config},
                             success_threshold=None if is_multiagent else 0.9)
コード例 #2
0
ファイル: test_simple_rl.py プロジェクト: vogulcan/ml-agents
def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        vis_encode_type=EncoderType(vis_encode_type))
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        batch_size=16,
        learning_rate=3e-4,
        buffer_init_steps=0,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=100,
    )
    # The number of steps is pretty small for these encoders
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
コード例 #3
0
ファイル: test_simple_rl.py プロジェクト: vogulcan/ml-agents
def test_gail_visual_sac(simple_record, action_sizes):
    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=1,
        num_vector=0,
        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32,
                                            demo_path=demo_path)
    }
    hyperparams = attr.evolve(SAC_TORCH_CONFIG.hyperparameters,
                              learning_rate=3e-4,
                              batch_size=16)
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        reward_signals=reward_signals,
        hyperparameters=hyperparams,
        behavioral_cloning=bc_settings,
        max_steps=500,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
コード例 #4
0
ファイル: test_simple_rl.py プロジェクト: vogulcan/ml-agents
def test_simple_asymm_ghost_fails(action_sizes):
    # Make opponent for asymmetric case
    brain_name_opp = BRAIN_NAME + "Opp"
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"],
        action_sizes=action_sizes)
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=0.0,
        save_steps=5000,
        swap_steps=5000,
        team_change=2000,
    )
    config = attr.evolve(PPO_TORCH_CONFIG,
                         self_play=self_play_settings,
                         max_steps=3000)
    check_environment_trains(env, {
        BRAIN_NAME: config,
        brain_name_opp: config
    },
                             success_threshold=None)
    processed_rewards = [
        default_reward_processor(rewards)
        for rewards in env.final_rewards.values()
    ]
    success_threshold = 0.9
    assert any(reward > success_threshold
               for reward in processed_rewards) and any(
                   reward < success_threshold for reward in processed_rewards)
コード例 #5
0
ファイル: test_simple_rl.py プロジェクト: zereyak13/ml-agents
def test_simple_ghost(action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
    )
    config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #6
0
ファイル: test_simple_rl.py プロジェクト: zereyak13/ml-agents
def test_2d_ppo(action_sizes):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
    )
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #7
0
ファイル: test_hybrid.py プロジェクト: vogulcan/ml-agents
def test_hybrid_visual_ppo(num_visual):
    env = SimpleEnvironment([BRAIN_NAME],
                            num_visual=num_visual,
                            num_vector=0,
                            action_sizes=(1, 1))
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #8
0
ファイル: test_simple_rl.py プロジェクト: MXSLaser/ml-agents
def test_visual_poca(num_visual):
    env = MultiAgentEnvironment([BRAIN_NAME],
                                action_sizes=(0, 1),
                                num_agents=2,
                                num_visual=num_visual)
    new_hyperparams = attr.evolve(POCA_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(POCA_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #9
0
ファイル: test_simple_rl.py プロジェクト: zereyak13/ml-agents
def test_2d_sac(action_sizes):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
コード例 #10
0
ファイル: test_simple_rl.py プロジェクト: zcemycl/ml-agents
def test_visual_ppo(num_visual, use_discrete):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=use_discrete,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #11
0
def test_var_len_obs_ppo(num_vis, num_vector, num_var_len, action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        num_visual=num_vis,
        num_vector=num_vector,
        num_var_len=num_var_len,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #12
0
def test_hybrid_ppo(action_size):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
    new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=10000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
コード例 #13
0
ファイル: test_simple_rl.py プロジェクト: vogulcan/ml-agents
def test_visual_sac(num_visual, action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
    )
    new_hyperparams = attr.evolve(SAC_TORCH_CONFIG.hyperparameters,
                                  batch_size=16,
                                  learning_rate=3e-4)
    config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #14
0
ファイル: test_simple_rl.py プロジェクト: zereyak13/ml-agents
def test_simple_asymm_ghost(action_sizes):
    # Make opponent for asymmetric case
    brain_name_opp = BRAIN_NAME + "Opp"
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0,
        save_steps=10000,
        swap_steps=10000,
        team_change=400,
    )
    config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=4000)
    check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
コード例 #15
0
ファイル: test_simple_rl.py プロジェクト: zereyak13/ml-agents
def test_gail(simple_record, action_sizes, trainer_config):
    demo_path = simple_record(action_sizes)
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    }
    config = attr.evolve(
        trainer_config,
        reward_signals=reward_signals,
        behavioral_cloning=bc_settings,
        max_steps=500,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
コード例 #16
0
def test_hybrid_visual_sac(num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1)
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        buffer_size=50000,
        batch_size=128,
        learning_rate=3.0e-4,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=3000
    )
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #17
0
ファイル: test_hybrid.py プロジェクト: zereyak13/ml-agents
def test_hybrid_sac(action_size):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)

    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        buffer_size=50000,
        batch_size=256,
        buffer_init_steps=0,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
    )
    check_environment_trains(
        env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
    )
コード例 #18
0
ファイル: test_hybrid.py プロジェクト: SancySwachitha/Drone
def test_hybrid_visual_ppo(num_visual, training_seed):
    env = SimpleEnvironment([BRAIN_NAME],
                            num_visual=num_visual,
                            num_vector=0,
                            action_sizes=(1, 1))
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters,
        batch_size=64,
        buffer_size=1024,
        learning_rate=1e-4,
    )
    config = attr.evolve(PPO_TORCH_CONFIG,
                         hyperparameters=new_hyperparams,
                         max_steps=8000)
    check_environment_trains(env, {BRAIN_NAME: config},
                             training_seed=training_seed)
コード例 #19
0
def test_subprocess_failing_step(num_envs):
    def failing_step_env_factory(_worker_id, _config):
        env = UnexpectedExceptionEnvironment(
            ["1D"], use_discrete=True, to_raise=CustomTestOnlyException
        )
        return env

    env_manager = SubprocessEnvManager(failing_step_env_factory, RunOptions())
    # Expect the exception raised to be routed back up to the top level.
    with pytest.raises(CustomTestOnlyException):
        check_environment_trains(
            failing_step_env_factory(0, []),
            {"1D": ppo_dummy_config()},
            env_manager=env_manager,
            success_threshold=None,
        )
    env_manager.close()
コード例 #20
0
ファイル: test_simple_rl.py プロジェクト: zereyak13/ml-agents
def test_simple_ghost_fails(action_sizes):
    env = SimpleEnvironment(
        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the ghosted policy is never swapped with a competent policy.
    # Swap occurs after max step is reached.
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=4000
    )
    config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None)
    processed_rewards = [
        default_reward_processor(rewards) for rewards in env.final_rewards.values()
    ]
    success_threshold = 0.9
    assert any(reward > success_threshold for reward in processed_rewards) and any(
        reward < success_threshold for reward in processed_rewards
    )
コード例 #21
0
ファイル: test_simple_rl.py プロジェクト: MXSLaser/ml-agents
def test_var_len_obs_and_goal_ppo(num_vis, num_vector, num_var_len,
                                  action_sizes, conditioning_type):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        num_visual=num_vis,
        num_vector=num_vector,
        num_var_len=num_var_len,
        step_size=0.2,
        goal_indices=[0],
    )
    new_network = attr.evolve(POCA_TORCH_CONFIG.network_settings,
                              goal_conditioning_type=conditioning_type)
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(PPO_TORCH_CONFIG,
                         hyperparameters=new_hyperparams,
                         network_settings=new_network)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #22
0
ファイル: test_hybrid.py プロジェクト: vogulcan/ml-agents
def test_hybrid_recurrent_ppo():
    env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
    new_network_settings = attr.evolve(
        PPO_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters,
        learning_rate=1.0e-3,
        batch_size=64,
        buffer_size=512,
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=3000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
コード例 #23
0
def test_hybrid_recurrent_sac():
    env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        batch_size=256,
        learning_rate=1e-3,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=4000,
    )
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #24
0
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], use_discrete=True)
        return env

    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    # Run PPO using env_manager
    check_environment_trains(
        simple_env_factory(0, []),
        {"1D": ppo_dummy_config()},
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.7
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
コード例 #25
0
ファイル: test_simple_rl.py プロジェクト: donlee90/ml-agents
def test_var_len_obs_and_goal_poca(num_vis, num_vector, num_var_len,
                                   conditioning_type):
    env = MultiAgentEnvironment(
        [BRAIN_NAME],
        action_sizes=(0, 1),
        num_visual=num_vis,
        num_vector=num_vector,
        num_var_len=num_var_len,
        step_size=0.2,
        num_agents=2,
        goal_indices=[0],
    )
    new_network = attr.evolve(POCA_TORCH_CONFIG.network_settings,
                              goal_conditioning_type=conditioning_type)
    new_hyperparams = attr.evolve(POCA_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(
        POCA_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network,
        max_steps=5000,
    )
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #26
0
def test_recurrent_sac(use_discrete):
    step_size = 0.2 if use_discrete else 0.5
    env = MemoryEnvironment(
        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        batch_size=256,
        learning_rate=1e-3,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=2000,
    )
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #27
0
ファイル: test_simple_rl.py プロジェクト: zcemycl/ml-agents
def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=True,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        vis_encode_type=EncoderType(vis_encode_type))
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=900,
        summary_freq=100,
    )
    # The number of steps is pretty small for these encoders
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
コード例 #28
0
def test_recurrent_sac(action_sizes):
    step_size = 0.2 if action_sizes == (0, 1) else 0.5
    env = MemoryEnvironment(
        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        batch_size=256,
        learning_rate=3e-4,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=4000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1337)
コード例 #29
0
ファイル: test_simple_rl.py プロジェクト: zcemycl/ml-agents
def test_simple_ppo(use_discrete):
    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    config = attr.evolve(PPO_TORCH_CONFIG)
    check_environment_trains(env, {BRAIN_NAME: config})
コード例 #30
0
ファイル: test_simple_rl.py プロジェクト: vogulcan/ml-agents
def test_simple_ppo(action_sizes):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    config = attr.evolve(PPO_TORCH_CONFIG)
    check_environment_trains(env, {BRAIN_NAME: config})