예제 #1
0
def test_recurrent_sac(use_discrete):
    step_size = 0.5 if use_discrete else 0.2
    env = MemoryEnvironment([BRAIN_NAME],
                            use_discrete=use_discrete,
                            step_size=step_size)
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16,
                                              sequence_length=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TF_CONFIG.hyperparameters,
        batch_size=128,
        learning_rate=1e-3,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=5000,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})
예제 #2
0
def test_recurrent_poca(action_sizes, is_multiagent):
    if is_multiagent:
        # This is not a recurrent environment, just check if LSTM doesn't crash
        env = MultiAgentEnvironment([BRAIN_NAME],
                                    action_sizes=action_sizes,
                                    num_agents=2)
    else:
        # Actually test LSTM here
        env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        POCA_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        POCA_TORCH_CONFIG.hyperparameters,
        learning_rate=1.0e-3,
        batch_size=64,
        buffer_size=128,
    )
    config = attr.evolve(
        POCA_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=500 if is_multiagent else 6000,
    )
    check_environment_trains(env, {BRAIN_NAME: config},
                             success_threshold=None if is_multiagent else 0.9)
def test_recurrent_ppo(use_discrete):
    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    override_vals = {
        "max_steps": 5000,
        "batch_size": 64,
        "buffer_size": 128,
        "learning_rate": 1e-3,
        "use_recurrent": True,
    }
    config = generate_config(PPO_CONFIG, override_vals)
    _check_environment_trains(env, config, success_threshold=0.9)
예제 #4
0
def test_recurrent_sac(use_discrete):
    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    override_vals = {
        "batch_size": 64,
        "use_recurrent": True,
        "max_steps": 3000,
        "learning_rate": 1e-3,
        "buffer_init_steps": 500,
    }
    config = generate_config(SAC_CONFIG, override_vals)
    _check_environment_trains(env, config)
def test_recurrent_ppo(use_discrete):
    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    new_network_settings = attr.evolve(
        PPO_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        PPO_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128
    )
    config = attr.evolve(
        PPO_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=5000,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
예제 #6
0
def test_hybrid_recurrent_ppo():
    env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
    new_network_settings = attr.evolve(
        PPO_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters,
        learning_rate=1.0e-3,
        batch_size=64,
        buffer_size=512,
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=3000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
예제 #7
0
def test_hybrid_recurrent_sac():
    env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        batch_size=256,
        learning_rate=1e-3,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=4000,
    )
    check_environment_trains(env, {BRAIN_NAME: config})
예제 #8
0
def test_recurrent_ppo(action_sizes):
    env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        PPO_TF_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        PPO_TF_CONFIG.hyperparameters,
        learning_rate=1.0e-3,
        batch_size=64,
        buffer_size=128,
    )
    config = attr.evolve(
        PPO_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=5000,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
예제 #9
0
def test_recurrent_sac(use_discrete):
    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
    new_networksettings = attr.evolve(
        SAC_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16,
                                              sequence_length=32),
    )
    new_hyperparams = attr.evolve(
        SAC_CONFIG.hyperparameters,
        batch_size=64,
        learning_rate=1e-3,
        buffer_init_steps=500,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=5000,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})
예제 #10
0
def test_recurrent_sac(action_sizes):
    step_size = 0.2 if action_sizes == (0, 1) else 0.5
    env = MemoryEnvironment(
        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TORCH_CONFIG.hyperparameters,
        batch_size=256,
        learning_rate=3e-4,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=4000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1337)
예제 #11
0
def test_recurrent_sac(action_sizes):
    step_size = 0.2 if action_sizes == (0, 1) else 0.5
    env = MemoryEnvironment([BRAIN_NAME],
                            action_sizes=action_sizes,
                            step_size=step_size)
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        SAC_TF_CONFIG.hyperparameters,
        batch_size=128,
        learning_rate=1e-3,
        buffer_init_steps=1000,
        steps_per_update=2,
    )
    config = attr.evolve(
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=4000,
        framework=FrameworkType.TENSORFLOW,
    )
    _check_environment_trains(env, {BRAIN_NAME: config})