def test_exclude_include_saved_params(model_class):
    """
    Test if exclude and include parameters of save() work

    :param model_class: (BaseAlgorithm) A RL model
    """
    env = DummyVecEnv([lambda: IdentityEnvBox(10)])

    # create model, set verbose as 2, which is not standard
    model = model_class('MlpPolicy',
                        env,
                        policy_kwargs=dict(net_arch=[16]),
                        verbose=2)

    # Check if exclude works
    model.save("test_save.zip", exclude=["verbose"])
    del model
    model = model_class.load("test_save")
    # check if verbose was not saved
    assert model.verbose != 2

    # set verbose as something different then standard settings
    model.verbose = 2
    # Check if include works
    model.save("test_save.zip", exclude=["verbose"], include=["verbose"])
    del model
    model = model_class.load("test_save")
    assert model.verbose == 2

    # clear file from os
    os.remove("test_save.zip")
Exemple #2
0
def test_continuous(model_class):
    env = IdentityEnvBox(eps=0.5)

    n_steps = {
        A2C: 3500,
        PPO: 3000,
        SAC: 700,
        TD3: 500,
        DDPG: 500
    }[model_class]

    kwargs = dict(policy_kwargs=dict(net_arch=[64, 64]), seed=0, gamma=0.95)
    if model_class in [TD3]:
        n_actions = 1
        action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                         sigma=0.1 * np.ones(n_actions))
        kwargs["action_noise"] = action_noise

    model = model_class("MlpPolicy", env, **kwargs).learn(n_steps)

    evaluate_policy(model,
                    env,
                    n_eval_episodes=20,
                    reward_threshold=90,
                    warn=False)
def test_identity_continuous():
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    """
    env = DummyVecEnv([lambda: IdentityEnvBox(eps=0.5)])

    if model_class in [DDPG, TD3]:
        n_actions = 1
        action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                         sigma=0.1 * np.ones(n_actions))
    else:
        action_noise = None

    model = model_class("MlpPolicy",
                        env,
                        gamma=0.1,
                        seed=0,
                        action_noise=action_noise,
                        buffer_size=int(1e6))
    model.learn(total_timesteps=20000)

    evaluate_policy(model, env, n_eval_episodes=20, reward_threshold=90)
    # Free memory
    del model, env
def select_env(model_class: BaseAlgorithm) -> gym.Env:
    """
    Selects an environment with the correct action space as DQN only supports discrete action space
    """
    if model_class == DQN:
        return IdentityEnv(10)
    else:
        return IdentityEnvBox(10)
def select_env(model_class: BaseAlgorithm) -> gym.Env:
    """
    Selects an environment with the correct action space as QRDQN, DQNClipped, DQNReg only support discrete action space
    """
    if model_class in {QRDQN, DQNReg, DQNClipped}:
        return IdentityEnv(10)
    else:
        return IdentityEnvBox(10)
def test_common_failures_reset():
    """
    Test that common failure cases of the `reset_method` are caught
    """
    env = IdentityEnvBox()
    # Return an observation that does not match the observation_space
    check_reset_assert_error(env, np.ones((3,)))
    # The observation is not a numpy array
    check_reset_assert_error(env, 1)

    # Return not only the observation
    check_reset_assert_error(env, (env.observation_space.sample(), False))
def test_set_env(model_class):
    """
    Test if set_env function does work correct
    :param model_class: (BaseAlgorithm) A RL model
    """
    env = DummyVecEnv([lambda: IdentityEnvBox(10)])
    env2 = DummyVecEnv([lambda: IdentityEnvBox(10)])
    env3 = IdentityEnvBox(10)

    # create model
    model = model_class('MlpPolicy', env, policy_kwargs=dict(net_arch=[16]))
    # learn
    model.learn(total_timesteps=1000, eval_freq=500)

    # change env
    model.set_env(env2)
    # learn again
    model.learn(total_timesteps=1000, eval_freq=500)

    # change env test wrapping
    model.set_env(env3)
    # learn again
    model.learn(total_timesteps=1000, eval_freq=500)
def test_common_failures_step():
    """
    Test that common failure cases of the `step` method are caught
    """
    env = IdentityEnvBox()

    # Wrong shape for the observation
    check_step_assert_error(env, (np.ones((4,)), 1.0, False, {}))
    # Obs is not a numpy array
    check_step_assert_error(env, (1, 1.0, False, {}))

    # Return a wrong reward
    check_step_assert_error(env, (env.observation_space.sample(), np.ones(1), False, {}))

    # Info dict is not returned
    check_step_assert_error(env, (env.observation_space.sample(), 0.0, False))

    # Done is not a boolean
    check_step_assert_error(env, (env.observation_space.sample(), 0.0, 3.0, {}))
    check_step_assert_error(env, (env.observation_space.sample(), 0.0, 1, {}))
def test_save_load(model_class):
    """
    Test if 'save' and 'load' saves and loads model correctly
    and if 'load_parameters' and 'get_policy_parameters' work correctly

    ''warning does not test function of optimizer parameter load

    :param model_class: (BaseAlgorithm) A RL model
    """
    env = DummyVecEnv([lambda: IdentityEnvBox(10)])

    # create model
    model = model_class('MlpPolicy',
                        env,
                        policy_kwargs=dict(net_arch=[16]),
                        verbose=1)
    model.learn(total_timesteps=500, eval_freq=250)

    env.reset()
    observations = np.concatenate(
        [env.step(env.action_space.sample())[0] for _ in range(10)], axis=0)

    # Get dictionary of current parameters
    params = deepcopy(model.policy.state_dict())

    # Modify all parameters to be random values
    random_params = dict((param_name, th.rand_like(param))
                         for param_name, param in params.items())

    # Update model parameters with the new random values
    model.policy.load_state_dict(random_params)

    new_params = model.policy.state_dict()
    # Check that all params are different now
    for k in params:
        assert not th.allclose(
            params[k], new_params[k]), "Parameters did not change as expected."

    params = new_params

    # get selected actions
    selected_actions, _ = model.predict(observations, deterministic=True)

    # Check
    model.save("test_save.zip")
    del model
    model = model_class.load("test_save", env=env)

    # check if params are still the same after load
    new_params = model.policy.state_dict()

    # Check that all params are the same as before save load procedure now
    for key in params:
        assert th.allclose(
            params[key], new_params[key]
        ), "Model parameters not the same after save and load."

    # check if model still selects the same actions
    new_selected_actions, _ = model.predict(observations, deterministic=True)
    assert np.allclose(selected_actions, new_selected_actions, 1e-4)

    # check if learn still works
    model.learn(total_timesteps=1000, eval_freq=500)

    # clear file from os
    os.remove("test_save.zip")
def test_save_load_policy(model_class, policy_str):
    """
    Test saving and loading policy only.

    :param model_class: (BaseAlgorithm) A RL model
    :param policy_str: (str) Name of the policy.
    """
    kwargs = {}
    if policy_str == 'MlpPolicy':
        env = IdentityEnvBox(10)
    else:
        if model_class in [SAC, TD3]:
            # Avoid memory error when using replay buffer
            # Reduce the size of the features
            kwargs = dict(buffer_size=250)
        env = FakeImageEnv(screen_height=40,
                           screen_width=40,
                           n_channels=2,
                           discrete=False)

    env = DummyVecEnv([lambda: env])

    # create model
    model = model_class(policy_str,
                        env,
                        policy_kwargs=dict(net_arch=[16]),
                        verbose=1,
                        **kwargs)
    model.learn(total_timesteps=500, eval_freq=250)

    env.reset()
    observations = np.concatenate(
        [env.step(env.action_space.sample())[0] for _ in range(10)], axis=0)

    policy = model.policy
    policy_class = policy.__class__
    actor, actor_class = None, None
    if model_class in [SAC, TD3]:
        actor = policy.actor
        actor_class = actor.__class__

    # Get dictionary of current parameters
    params = deepcopy(policy.state_dict())

    # Modify all parameters to be random values
    random_params = dict((param_name, th.rand_like(param))
                         for param_name, param in params.items())

    # Update model parameters with the new random values
    policy.load_state_dict(random_params)

    new_params = policy.state_dict()
    # Check that all params are different now
    for k in params:
        assert not th.allclose(
            params[k], new_params[k]), "Parameters did not change as expected."

    params = new_params

    # get selected actions
    selected_actions, _ = policy.predict(observations, deterministic=True)
    # Should also work with the actor only
    if actor is not None:
        selected_actions_actor, _ = actor.predict(observations,
                                                  deterministic=True)

    # Save and load policy
    policy.save("./logs/policy.pkl")
    # Save and load actor
    if actor is not None:
        actor.save("./logs/actor.pkl")

    del policy, actor

    policy = policy_class.load("./logs/policy.pkl")
    if actor_class is not None:
        actor = actor_class.load("./logs/actor.pkl")

    # check if params are still the same after load
    new_params = policy.state_dict()

    # Check that all params are the same as before save load procedure now
    for key in params:
        assert th.allclose(
            params[key], new_params[key]
        ), "Policy parameters not the same after save and load."

    # check if model still selects the same actions
    new_selected_actions, _ = policy.predict(observations, deterministic=True)
    assert np.allclose(selected_actions, new_selected_actions, 1e-4)

    if actor_class is not None:
        new_selected_actions_actor, _ = actor.predict(observations,
                                                      deterministic=True)
        assert np.allclose(selected_actions_actor, new_selected_actions_actor,
                           1e-4)
        assert np.allclose(selected_actions_actor, new_selected_actions, 1e-4)

    # clear file from os
    os.remove("./logs/policy.pkl")
    if actor_class is not None:
        os.remove("./logs/actor.pkl")