Python IdentityEnv Exemples, stable_baselines.common.identity_env.IdentityEnv Python Exemples

Exemple #1

0

Afficher le fichier

def test_identity(model_name):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)

    :param model_name: (str) Name of the RL model
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    model = LEARN_FUNC_DICT[model_name](env)

    n_trials = 1000
    reward_sum = 0
    set_global_seeds(0)
    obs = env.reset()
    for _ in range(n_trials):
        action, _ = model.predict(obs)
        obs, reward, _, _ = env.step(action)
        reward_sum += reward

    assert model.action_probability(obs).shape == (
        1, 10), "Error: action_probability not returning correct shape"
    action = env.action_space.sample()
    action_prob = model.action_probability(obs, actions=action)
    assert np.prod(action_prob.shape) == 1, "Error: not scalar probability"
    action_logprob = model.action_probability(obs, actions=action, logp=True)
    assert np.allclose(action_prob,
                       np.exp(action_logprob)), (action_prob, action_logprob)

    assert reward_sum > 0.9 * n_trials
    # Free memory
    del model, env

Exemple #2

0

Afficher le fichier

Fichier : test_identity.py Projet : zsrivastava/stable-baselines

def test_identity_discrete(model_name):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)

    :param model_name: (str) Name of the RL model
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    model = LEARN_FUNC_DICT[model_name](env)
    evaluate_policy(model, env, n_eval_episodes=20, reward_threshold=90)

    obs = env.reset()
    assert model.action_probability(obs).shape == (
        1,
        10,
    ), "Error: action_probability not returning correct shape"
    action = env.action_space.sample()
    action_prob = model.action_probability(obs, actions=action)
    assert np.prod(action_prob.shape) == 1, "Error: not scalar probability"
    action_logprob = model.action_probability(obs, actions=action, logp=True)
    assert np.allclose(action_prob, np.exp(action_logprob)), (
        action_prob,
        action_logprob,
    )

    # Free memory
    del model, env

Exemple #3

0

Afficher le fichier

def test_identity(model_class):
    """
    test the Disrete environment vectorisation detection

    :param model_class: (BaseRLModel) the RL model
    """
    check_shape(lambda: IdentityEnv(dim=10), model_class, (), (1, ))

Exemple #4

0

Afficher le fichier

Fichier : test_save.py Projet : osmanylc/deep-rl-collision-avoidance

def test_save_custom_objects(request, model_class):
    """
    Test feeding custom_objects in model.load(...) function
    """
    # Skip DQN (not an actor-critic policy)
    if model_class == DQN:
        return

    model_fname = './test_model_{}.zip'.format(request.node.name)

    try:
        env = DummyVecEnv([lambda: IdentityEnv(10)])

        # Create and save model with default MLP policy
        model = model_class(policy=MlpPolicy, env=env)
        model.save(model_fname)

        del model, env

        # Corrupt "policy" serialization in the file
        data_file = zipfile.ZipFile(model_fname, "r")
        # Load all data (can't just update one file in the archive)
        parameter_list = data_file.read("parameter_list")
        parameters = data_file.read("parameters")
        class_data = json.loads(data_file.read("data").decode())
        data_file.close()

        # Corrupt serialization of the "policy"
        class_data["policy"][":serialized:"] = (
            "Adding this should break serialization" +
            class_data["policy"][":serialized:"])

        # And dump everything back to the model file
        data_file = zipfile.ZipFile(model_fname, "w")
        data_file.writestr("data", json.dumps(class_data))
        data_file.writestr("parameter_list", parameter_list)
        data_file.writestr("parameters", parameters)
        data_file.close()

        # Try loading the model. This should
        # result in an error
        with pytest.raises(RuntimeError):
            model = model_class.load(model_fname)

        # Load model with custom objects ("custom" MlpPolicy)
        # and it should work fine.
        # Note: We could load model with just vanilla
        #       MlpPolicy, too.
        model = model_class.load(model_fname,
                                 custom_objects={"policy": CustomMlpPolicy})

        # Make sure we loaded custom MLP policy
        assert model.policy == CustomMlpPolicy
        del model

    finally:
        if os.path.exists(model_fname):
            os.remove(model_fname)

Exemple #5

0

Afficher le fichier

def test_identity(model_class):
    """
    test the Disrete environment vectorisation detection

    :param model_class: (BaseRLModel) the RL model
    """
    model = model_class(policy="MlpPolicy", env=DummyVecEnv([lambda: IdentityEnv(dim=10)]))

    env0 = IdentityEnv(dim=10)
    env1 = DummyVecEnv([lambda: IdentityEnv(dim=10)])

    n_trials = 100
    for env, expected_shape in [(env0, ()), (env1, (1,))]:
        obs = env.reset()
        for _ in range(n_trials):
            action, _ = model.predict(obs)
            assert np.array(action).shape == expected_shape
            obs, _, _, _ = env.step(action)

    # Free memory
    del model, env

Exemple #6

0

Afficher le fichier

def test_identity(learn_func):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)

    :param learn_func: (lambda (Gym Environment): A2CPolicy) the policy generator
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    model = learn_func(env)

    n_trials = 1000
    reward_sum = 0
    obs = env.reset()
    for _ in range(n_trials):
        action, _ = model.predict(obs)
        obs, reward, _, _ = env.step(action)
        reward_sum += reward
    assert reward_sum > 0.9 * n_trials
    # Free memory
    del model, env

Exemple #7

0

Afficher le fichier

def test_identity(model_name):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)

    :param model_name: (str) Name of the RL model
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    model = LEARN_FUNC_DICT[model_name](env)

    n_trials = 1000
    obs = env.reset()
    action_shape = model.predict(obs, deterministic=False)[0].shape
    action, _ = model.predict(obs, deterministic=True)
    assert action.shape == action_shape
    for _ in range(n_trials):
        new_action = model.predict(obs, deterministic=True)[0]
        assert action == model.predict(obs, deterministic=True)[0]
        assert new_action.shape == action_shape
    # Free memory
    del model, env

Exemple #8

0

Afficher le fichier

Fichier : test_identity.py Projet : princeton-vl/PackIt

def test_identity(model_name):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)

    :param model_name: (str) Name of the RL model
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    model = LEARN_FUNC_DICT[model_name](env)

    n_trials = 1000
    reward_sum = 0
    set_global_seeds(0)
    obs = env.reset()
    for _ in range(n_trials):
        action, _ = model.predict(obs)
        obs, reward, _, _ = env.step(action)
        reward_sum += reward
    assert reward_sum > 0.9 * n_trials
    # Free memory
    del model, env

Exemple #9

0

Afficher le fichier

Fichier : test_save.py Projet : safrooze/stable-baselines

def test_model_manipulation(model_policy):
    """
    Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching
    works and that the action prediction works

    :param model_policy: (BaseRLModel, Object) A model, policy pair
    """
    model_class, policy = model_policy

    try:
        env = DummyVecEnv([lambda: IdentityEnv(10)])

        # check the env is deterministic
        action = [env.action_space.sample()]
        set_global_seeds(0)
        obs = env.step(action)[0]
        for _ in range(N_TRIALS):
            set_global_seeds(0)
            assert obs == env.step(action)[0], "Error: environment tested not deterministic with the same seed"

        # create and train
        model = model_class(policy=policy, env=env)
        model.learn(total_timesteps=50000)

        # predict and measure the acc reward
        acc_reward = 0
        obs = env.reset()
        set_global_seeds(0)
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            acc_reward += reward
        acc_reward = sum(acc_reward) / N_TRIALS

        # saving
        model.save("./test_model")

        del model, env

        # loading
        model = model_class.load("./test_model")

        # changing environment (note: this can be done at loading)
        env = DummyVecEnv([lambda: IdentityEnv(10)])
        model.set_env(env)

        # predict the same output before saving
        loaded_acc_reward = 0
        obs = env.reset()
        set_global_seeds(0)
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            loaded_acc_reward += reward
        loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS
        assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \
                                                          "loading and saving"

        # learn post loading
        model.learn(total_timesteps=1000)

        # validate no reset post learning
        loaded_acc_reward = 0
        obs = env.reset()
        set_global_seeds(0)
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            loaded_acc_reward += reward
        loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS
        assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \
                                                          "pre learning and post learning"

        # predict new values
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, _, _, _ = env.step(action)

        del model, env

    finally:
        if os.path.exists("./test_model"):
            os.remove("./test_model")

Exemple #10

0

Afficher le fichier

Fichier : test_load_parameters.py Projet : Psyche-mia/stable-baselines-1

def test_load_parameters(request, model_class):
    """
    Test if ``load_parameters`` loads given parameters correctly (the model actually changes)
    and that the backwards compatability with a list of params works

    :param model_class: (BaseRLModel) A RL model
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    # create model
    model = model_class(policy="MlpPolicy", env=env)

    # test action probability for given (obs, action) pair
    env = model.get_env()
    obs = env.reset()
    observations = np.array([obs for _ in range(10)])
    observations = np.squeeze(observations)

    actions = np.array([env.action_space.sample() for _ in range(10)])
    original_actions_probas = model.action_probability(observations,
                                                       actions=actions)

    # Get dictionary of current parameters
    params = model.get_parameters()
    # Modify all parameters to be random values
    random_params = dict((param_name, np.random.random(size=param.shape))
                         for param_name, param in params.items())
    # Update model parameters with the new zeroed values
    model.load_parameters(random_params)
    # Get new action probas
    new_actions_probas = model.action_probability(observations,
                                                  actions=actions)

    # Check that at least some action probabilities are different now
    assert not np.any(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities did not change " \
                                                                                "after changing model parameters."
    # Also check that new parameters are there (they should be random_params)
    new_params = model.get_parameters()
    comparisons = [
        np.all(np.isclose(new_params[key], random_params[key]))
        for key in random_params.keys()
    ]
    assert all(
        comparisons), "Parameters of model are not the same as provided ones."

    # Now test the backwards compatibility with params being a list instead of a dict.
    # Get the ordering of parameters.
    tf_param_list = model.get_parameter_list()
    # Make random parameters negative to make sure the results should be different from
    # previous random values
    random_param_list = [
        -np.random.random(size=tf_param.shape) for tf_param in tf_param_list
    ]
    model.load_parameters(random_param_list)

    # Compare results against the previous load
    new_actions_probas_list = model.action_probability(observations,
                                                       actions=actions)
    assert not np.any(np.isclose(new_actions_probas, new_actions_probas_list)), "Action probabilities did not " \
                                                                                "change after changing model " \
                                                                                "parameters (list)."

    # Test file/file-like object loading for load_parameters.
    # Save whatever is stored in model now, assign random parameters,
    # load parameters from file with load_parameters and check if original probabilities
    # are restored
    original_actions_probas = model.action_probability(observations,
                                                       actions=actions)
    model_fname = './test_model_{}.zip'.format(request.node.name)

    try:
        # Save model to a file and file-like buffer
        # (partly copy/paste from test_save)
        model.save(model_fname)
        b_io = BytesIO()
        model.save(b_io)
        model_bytes = b_io.getvalue()
        b_io.close()

        random_params = dict((param_name, np.random.random(size=param.shape))
                             for param_name, param in params.items())
        model.load_parameters(random_params)
        # Previous tests confirm that load_parameters works,
        # so just right into testing loading from file
        model.load_parameters(model_fname)
        new_actions_probas = model.action_probability(observations,
                                                      actions=actions)
        assert np.all(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities changed " \
                                                                                "after load_parameters from a file."
        # Reset with random parameters again
        model.load_parameters(random_params)
        # Now load from file-like (copy/paste from test_save)
        b_io = BytesIO(model_bytes)
        model.load_parameters(b_io)
        b_io.close()
        new_actions_probas = model.action_probability(observations,
                                                      actions=actions)
        assert np.all(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities changed after" \
                                                                                "load_parameters from a file-like."
    finally:
        if os.path.exists(model_fname):
            os.remove(model_fname)

    # Test `exact_match` functionality of load_parameters
    original_actions_probas = model.action_probability(observations,
                                                       actions=actions)
    # Create dictionary with one variable name missing
    truncated_random_params = dict(
        (param_name, np.random.random(size=param.shape))
        for param_name, param in params.items())
    # Remove some element
    _ = truncated_random_params.pop(list(truncated_random_params.keys())[0])
    # With exact_match=True, this should be an expection
    with pytest.raises(RuntimeError):
        model.load_parameters(truncated_random_params, exact_match=True)
    # Make sure we did not update model regardless
    new_actions_probas = model.action_probability(observations,
                                                  actions=actions)
    assert np.all(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities changed " \
                                                                            "after load_parameters raised " \
                                                                            "RunTimeError (exact_match=True)."

    # With False, this should be fine
    model.load_parameters(truncated_random_params, exact_match=False)
    # Also check that results changed, again
    new_actions_probas = model.action_probability(observations,
                                                  actions=actions)
    assert not np.any(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities did not " \
                                                                                "change after changing model " \
                                                                                "parameters (exact_match=False)."

    del model, env

Exemple #11

0

Afficher le fichier

Fichier : test_save.py Projet : sophiaas/stable-baselines

def test_model_manipulation(model_class, storage_method):
    """
    Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching
    works and that the action prediction works

    :param model_class: (BaseRLModel) A RL model
    """

    try:
        env = DummyVecEnv([lambda: IdentityEnv(10)])

        # create and train
        model = model_class(policy="MlpPolicy", env=env)
        model.learn(total_timesteps=50000, seed=0)

        # predict and measure the acc reward
        acc_reward = 0
        set_global_seeds(0)
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            # Test action probability method
            model.action_probability(obs)
            obs, reward, _, _ = env.step(action)
            acc_reward += reward
        acc_reward = sum(acc_reward) / N_TRIALS

        # saving
        if storage_method == "path":  # saving to a path
            model.save("./test_model")
        else:  # saving to a file-like object (BytesIO in this case)
            b_io = BytesIO()
            model.save(b_io)
            model_bytes = b_io.getvalue()
            b_io.close()

        del model, env

        # loading
        if storage_method == "path":  # loading from path
            model = model_class.load("./test_model")
        else:
            b_io = BytesIO(
                model_bytes
            )  # loading from file-like object (BytesIO in this case)
            model = model_class.load(b_io)
            b_io.close()

        # changing environment (note: this can be done at loading)
        env = DummyVecEnv([lambda: IdentityEnv(10)])
        model.set_env(env)

        # predict the same output before saving
        loaded_acc_reward = 0
        set_global_seeds(0)
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            loaded_acc_reward += reward
        loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS
        assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \
                                                          "loading and saving"

        # learn post loading
        model.learn(total_timesteps=100, seed=0)

        # validate no reset post learning
        loaded_acc_reward = 0
        set_global_seeds(0)
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            loaded_acc_reward += reward
        loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS
        assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \
                                                          "pre learning and post learning"

        # predict new values
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, _, _, _ = env.step(action)

        del model, env

    finally:
        if os.path.exists("./test_model"):
            os.remove("./test_model")

Exemple #12

0

Afficher le fichier

Fichier : test_save.py Projet : osmanylc/deep-rl-collision-avoidance

def test_model_manipulation(request, model_class, storage_method,
                            store_format):
    """
    Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching
    works and that the action prediction works

    :param model_class: (BaseRLModel) A RL model
    :param storage_method: (str) Should file be saved to a file ("path") or to a buffer 
        ("file-like")
    :param store_format: (str) Save format, either "zip" or "cloudpickle".
    """

    # Use postfix ".model" so we can remove the file later
    model_fname = './test_model_{}.model'.format(request.node.name)
    store_as_cloudpickle = store_format == "cloudpickle"

    try:
        env = DummyVecEnv([lambda: IdentityEnv(10)])

        # create and train
        model = model_class(policy="MlpPolicy", env=env)
        model.learn(total_timesteps=50000)

        # predict and measure the acc reward
        acc_reward = 0
        set_global_seeds(0)
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            # Test action probability method
            model.action_probability(obs)
            obs, reward, _, _ = env.step(action)
            acc_reward += reward
        acc_reward = sum(acc_reward) / N_TRIALS

        # test action probability for given (obs, action) pair
        env = model.get_env()
        obs = env.reset()
        observations = np.array([obs for _ in range(10)])
        observations = np.squeeze(observations)
        actions = np.array([env.action_space.sample() for _ in range(10)])
        actions_probas = model.action_probability(observations,
                                                  actions=actions)
        assert actions_probas.shape == (len(actions), 1), actions_probas.shape
        assert actions_probas.min() >= 0, actions_probas.min()
        assert actions_probas.max() <= 1, actions_probas.max()

        # saving
        if storage_method == "path":  # saving to a path
            model.save(model_fname, cloudpickle=store_as_cloudpickle)
        else:  # saving to a file-like object (BytesIO in this case)
            b_io = BytesIO()
            model.save(b_io, cloudpickle=store_as_cloudpickle)
            model_bytes = b_io.getvalue()
            b_io.close()

        del model, env

        # loading
        if storage_method == "path":  # loading from path
            model = model_class.load(model_fname)
        else:
            b_io = BytesIO(
                model_bytes
            )  # loading from file-like object (BytesIO in this case)
            model = model_class.load(b_io)
            b_io.close()

        # changing environment (note: this can be done at loading)
        env = DummyVecEnv([lambda: IdentityEnv(10)])
        model.set_env(env)

        # predict the same output before saving
        loaded_acc_reward = 0
        set_global_seeds(0)
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            loaded_acc_reward += reward
        loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS
        assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \
                                                          "loading and saving"

        # learn post loading
        model.learn(total_timesteps=100)

        # validate no reset post learning
        loaded_acc_reward = 0
        set_global_seeds(0)
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, reward, _, _ = env.step(action)
            loaded_acc_reward += reward
        loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS
        assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \
                                                          "pre learning and post learning"

        # predict new values
        obs = env.reset()
        for _ in range(N_TRIALS):
            action, _ = model.predict(obs)
            obs, _, _, _ = env.step(action)

        del model, env

    finally:
        if os.path.exists(model_fname):
            os.remove(model_fname)

Exemple #13

0

Afficher le fichier

Fichier : test_multiple_learn.py Projet : zsrivastava/stable-baselines

 def make_env():
     return IdentityEnv(ep_length=1e10, dim=2)

Exemple #14

0

Afficher le fichier

def test_model_manipulation(request, model_class, storage_method,
                            store_format):
    """
    Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching
    works and that the action prediction works

    :param model_class: (BaseRLModel) A RL model
    :param storage_method: (str) Should file be saved to a file ("path") or to a buffer
        ("file-like")
    :param store_format: (str) Save format, either "zip" or "cloudpickle".
    """

    # Use postfix ".model" so we can remove the file later
    model_fname = './test_model_{}.model'.format(request.node.name)
    store_as_cloudpickle = store_format == "cloudpickle"

    kwargs = dict(seed=0, gamma=0.4)
    if model_class in [DQN]:
        kwargs["learning_starts"] = 0
        kwargs["exploration_final_eps"] = 0.05

    if model_class == PPO1:
        kwargs["entcoeff"] = 0.0
        kwargs["optim_batchsize"] = 4
        kwargs["timesteps_per_actorbatch"] = 4

    if model_class in [A2C, ACKTR, PPO2]:
        kwargs["n_steps"] = 4
        kwargs["ent_coef"] = 0.0

    if model_class in [TRPO]:
        kwargs["timesteps_per_batch"] = 4

    try:
        env = DummyVecEnv([lambda: IdentityEnv(10)])

        # create and train
        model = model_class(policy="MlpPolicy", env=env, **kwargs)
        model.learn(total_timesteps=15)

        env.envs[0].action_space.seed(0)
        mean_reward, _ = evaluate_policy(model,
                                         env,
                                         deterministic=True,
                                         n_eval_episodes=N_EVAL_EPISODES)

        # test action probability for given (obs, action) pair
        env = model.get_env()
        obs = env.reset()
        observations = np.array(
            [env.step([env.action_space.sample()])[0] for _ in range(10)])
        observations = np.squeeze(observations)
        selected_actions, _ = model.predict(observations, deterministic=True)

        actions = np.array([env.action_space.sample() for _ in range(10)])
        actions_probas = model.action_probability(observations,
                                                  actions=actions)
        assert actions_probas.shape == (len(actions), 1), actions_probas.shape
        assert actions_probas.min() >= 0, actions_probas.min()
        assert actions_probas.max() <= 1, actions_probas.max()

        # saving
        if storage_method == "path":  # saving to a path
            model.save(model_fname, cloudpickle=store_as_cloudpickle)
        else:  # saving to a file-like object (BytesIO in this case)
            b_io = BytesIO()
            model.save(b_io, cloudpickle=store_as_cloudpickle)
            model_bytes = b_io.getvalue()
            b_io.close()

        del model, env

        # loading
        if storage_method == "path":  # loading from path
            model = model_class.load(model_fname)
        else:
            b_io = BytesIO(
                model_bytes
            )  # loading from file-like object (BytesIO in this case)
            model = model_class.load(b_io)
            b_io.close()

        # changing environment (note: this can be done at loading)
        env = DummyVecEnv([lambda: IdentityEnv(10)])
        model.set_env(env)

        # check if model still selects the same actions
        new_selected_actions, _ = model.predict(observations,
                                                deterministic=True)
        assert np.allclose(selected_actions, new_selected_actions, 1e-4)

        # learn post loading
        model.learn(total_timesteps=15)

        # predict new values
        evaluate_policy(model, env, n_eval_episodes=N_EVAL_EPISODES)

        del model, env

    finally:
        if os.path.exists(model_fname):
            os.remove(model_fname)