コード例 #1
0
def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=True,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
        vis_encode_type=EncoderType(vis_encode_type))
    new_hyperparams = attr.evolve(
        SAC_TF_CONFIG.hyperparameters,
        batch_size=16,
        learning_rate=3e-4,
        buffer_init_steps=0,
    )
    config = attr.evolve(
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=100,
        framework=FrameworkType.TENSORFLOW,
    )
    # The number of steps is pretty small for these encoders
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
コード例 #2
0
ファイル: test_saver.py プロジェクト: terite/HexChess
def test_load_policy_different_hidden_units(tmp_path, vis_encode_type):
    path1 = os.path.join(tmp_path, "runid1")
    trainer_params = TrainerSettings()
    trainer_params.network_settings = NetworkSettings(
        hidden_units=12, vis_encode_type=EncoderType(vis_encode_type))
    policy = create_policy_mock(trainer_params, use_visual=True)
    conv_params = [
        mod for mod in policy.actor.parameters() if len(mod.shape) > 2
    ]

    model_saver = TorchModelSaver(trainer_params, path1)
    model_saver.register(policy)
    model_saver.initialize_or_load(policy)
    policy.set_step(2000)

    mock_brain_name = "MockBrain"
    model_saver.save_checkpoint(mock_brain_name, 2000)

    # Try load from this path
    trainer_params2 = TrainerSettings()
    trainer_params2.network_settings = NetworkSettings(
        hidden_units=10, vis_encode_type=EncoderType(vis_encode_type))
    model_saver2 = TorchModelSaver(trainer_params2, path1, load=True)
    policy2 = create_policy_mock(trainer_params2, use_visual=True)
    conv_params2 = [
        mod for mod in policy2.actor.parameters() if len(mod.shape) > 2
    ]
    # asserts convolutions have different parameters before load
    for conv1, conv2 in zip(conv_params, conv_params2):
        assert not torch.equal(conv1, conv2)
    # asserts layers still have different dimensions
    for mod1, mod2 in zip(policy.actor.parameters(),
                          policy2.actor.parameters()):
        if mod1.shape[0] == 12:
            assert mod2.shape[0] == 10
    model_saver2.register(policy2)
    model_saver2.initialize_or_load(policy2)
    # asserts convolutions have same parameters after load
    for conv1, conv2 in zip(conv_params, conv_params2):
        assert torch.equal(conv1, conv2)
    # asserts layers still have different dimensions
    for mod1, mod2 in zip(policy.actor.parameters(),
                          policy2.actor.parameters()):
        if mod1.shape[0] == 12:
            assert mod2.shape[0] == 10
コード例 #3
0
ファイル: test_simple_rl.py プロジェクト: tyohanan/ml-agents
def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        use_discrete=True,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
    )
    new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
    config = attr.evolve(
        PPO_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=500,
        summary_freq=100,
    )
    # The number of steps is pretty small for these encoders
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
コード例 #4
0
ファイル: test_simple_rl.py プロジェクト: saleh9292/ml-agents
def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
    )
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
        vis_encode_type=EncoderType(vis_encode_type))
    new_hyperparams = attr.evolve(PPO_TF_CONFIG.hyperparameters,
                                  learning_rate=3.0e-4)
    config = attr.evolve(
        PPO_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
        max_steps=400,
        summary_freq=100,
        framework=FrameworkType.TENSORFLOW,
    )
    # The number of steps is pretty small for these encoders
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)