def test_identity(model_name): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param model_name: (str) Name of the RL model """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = LEARN_FUNC_DICT[model_name](env) n_trials = 1000 reward_sum = 0 set_global_seeds(0) obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward assert model.action_probability(obs).shape == ( 1, 10), "Error: action_probability not returning correct shape" action = env.action_space.sample() action_prob = model.action_probability(obs, actions=action) assert np.prod(action_prob.shape) == 1, "Error: not scalar probability" action_logprob = model.action_probability(obs, actions=action, logp=True) assert np.allclose(action_prob, np.exp(action_logprob)), (action_prob, action_logprob) assert reward_sum > 0.9 * n_trials # Free memory del model, env
def test_identity_discrete(model_name): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param model_name: (str) Name of the RL model """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = LEARN_FUNC_DICT[model_name](env) evaluate_policy(model, env, n_eval_episodes=20, reward_threshold=90) obs = env.reset() assert model.action_probability(obs).shape == ( 1, 10, ), "Error: action_probability not returning correct shape" action = env.action_space.sample() action_prob = model.action_probability(obs, actions=action) assert np.prod(action_prob.shape) == 1, "Error: not scalar probability" action_logprob = model.action_probability(obs, actions=action, logp=True) assert np.allclose(action_prob, np.exp(action_logprob)), ( action_prob, action_logprob, ) # Free memory del model, env
def test_identity(model_class): """ test the Disrete environment vectorisation detection :param model_class: (BaseRLModel) the RL model """ check_shape(lambda: IdentityEnv(dim=10), model_class, (), (1, ))
def test_save_custom_objects(request, model_class): """ Test feeding custom_objects in model.load(...) function """ # Skip DQN (not an actor-critic policy) if model_class == DQN: return model_fname = './test_model_{}.zip'.format(request.node.name) try: env = DummyVecEnv([lambda: IdentityEnv(10)]) # Create and save model with default MLP policy model = model_class(policy=MlpPolicy, env=env) model.save(model_fname) del model, env # Corrupt "policy" serialization in the file data_file = zipfile.ZipFile(model_fname, "r") # Load all data (can't just update one file in the archive) parameter_list = data_file.read("parameter_list") parameters = data_file.read("parameters") class_data = json.loads(data_file.read("data").decode()) data_file.close() # Corrupt serialization of the "policy" class_data["policy"][":serialized:"] = ( "Adding this should break serialization" + class_data["policy"][":serialized:"]) # And dump everything back to the model file data_file = zipfile.ZipFile(model_fname, "w") data_file.writestr("data", json.dumps(class_data)) data_file.writestr("parameter_list", parameter_list) data_file.writestr("parameters", parameters) data_file.close() # Try loading the model. This should # result in an error with pytest.raises(RuntimeError): model = model_class.load(model_fname) # Load model with custom objects ("custom" MlpPolicy) # and it should work fine. # Note: We could load model with just vanilla # MlpPolicy, too. model = model_class.load(model_fname, custom_objects={"policy": CustomMlpPolicy}) # Make sure we loaded custom MLP policy assert model.policy == CustomMlpPolicy del model finally: if os.path.exists(model_fname): os.remove(model_fname)
def test_identity(model_class): """ test the Disrete environment vectorisation detection :param model_class: (BaseRLModel) the RL model """ model = model_class(policy="MlpPolicy", env=DummyVecEnv([lambda: IdentityEnv(dim=10)])) env0 = IdentityEnv(dim=10) env1 = DummyVecEnv([lambda: IdentityEnv(dim=10)]) n_trials = 100 for env, expected_shape in [(env0, ()), (env1, (1,))]: obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) assert np.array(action).shape == expected_shape obs, _, _, _ = env.step(action) # Free memory del model, env
def test_identity(learn_func): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param learn_func: (lambda (Gym Environment): A2CPolicy) the policy generator """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = learn_func(env) n_trials = 1000 reward_sum = 0 obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward assert reward_sum > 0.9 * n_trials # Free memory del model, env
def test_identity(model_name): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param model_name: (str) Name of the RL model """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = LEARN_FUNC_DICT[model_name](env) n_trials = 1000 obs = env.reset() action_shape = model.predict(obs, deterministic=False)[0].shape action, _ = model.predict(obs, deterministic=True) assert action.shape == action_shape for _ in range(n_trials): new_action = model.predict(obs, deterministic=True)[0] assert action == model.predict(obs, deterministic=True)[0] assert new_action.shape == action_shape # Free memory del model, env
def test_identity(model_name): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param model_name: (str) Name of the RL model """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = LEARN_FUNC_DICT[model_name](env) n_trials = 1000 reward_sum = 0 set_global_seeds(0) obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward assert reward_sum > 0.9 * n_trials # Free memory del model, env
def test_model_manipulation(model_policy): """ Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching works and that the action prediction works :param model_policy: (BaseRLModel, Object) A model, policy pair """ model_class, policy = model_policy try: env = DummyVecEnv([lambda: IdentityEnv(10)]) # check the env is deterministic action = [env.action_space.sample()] set_global_seeds(0) obs = env.step(action)[0] for _ in range(N_TRIALS): set_global_seeds(0) assert obs == env.step(action)[0], "Error: environment tested not deterministic with the same seed" # create and train model = model_class(policy=policy, env=env) model.learn(total_timesteps=50000) # predict and measure the acc reward acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) acc_reward += reward acc_reward = sum(acc_reward) / N_TRIALS # saving model.save("./test_model") del model, env # loading model = model_class.load("./test_model") # changing environment (note: this can be done at loading) env = DummyVecEnv([lambda: IdentityEnv(10)]) model.set_env(env) # predict the same output before saving loaded_acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "loading and saving" # learn post loading model.learn(total_timesteps=1000) # validate no reset post learning loaded_acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "pre learning and post learning" # predict new values obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, _, _, _ = env.step(action) del model, env finally: if os.path.exists("./test_model"): os.remove("./test_model")
def test_load_parameters(request, model_class): """ Test if ``load_parameters`` loads given parameters correctly (the model actually changes) and that the backwards compatability with a list of params works :param model_class: (BaseRLModel) A RL model """ env = DummyVecEnv([lambda: IdentityEnv(10)]) # create model model = model_class(policy="MlpPolicy", env=env) # test action probability for given (obs, action) pair env = model.get_env() obs = env.reset() observations = np.array([obs for _ in range(10)]) observations = np.squeeze(observations) actions = np.array([env.action_space.sample() for _ in range(10)]) original_actions_probas = model.action_probability(observations, actions=actions) # Get dictionary of current parameters params = model.get_parameters() # Modify all parameters to be random values random_params = dict((param_name, np.random.random(size=param.shape)) for param_name, param in params.items()) # Update model parameters with the new zeroed values model.load_parameters(random_params) # Get new action probas new_actions_probas = model.action_probability(observations, actions=actions) # Check that at least some action probabilities are different now assert not np.any(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities did not change " \ "after changing model parameters." # Also check that new parameters are there (they should be random_params) new_params = model.get_parameters() comparisons = [ np.all(np.isclose(new_params[key], random_params[key])) for key in random_params.keys() ] assert all( comparisons), "Parameters of model are not the same as provided ones." # Now test the backwards compatibility with params being a list instead of a dict. # Get the ordering of parameters. tf_param_list = model.get_parameter_list() # Make random parameters negative to make sure the results should be different from # previous random values random_param_list = [ -np.random.random(size=tf_param.shape) for tf_param in tf_param_list ] model.load_parameters(random_param_list) # Compare results against the previous load new_actions_probas_list = model.action_probability(observations, actions=actions) assert not np.any(np.isclose(new_actions_probas, new_actions_probas_list)), "Action probabilities did not " \ "change after changing model " \ "parameters (list)." # Test file/file-like object loading for load_parameters. # Save whatever is stored in model now, assign random parameters, # load parameters from file with load_parameters and check if original probabilities # are restored original_actions_probas = model.action_probability(observations, actions=actions) model_fname = './test_model_{}.zip'.format(request.node.name) try: # Save model to a file and file-like buffer # (partly copy/paste from test_save) model.save(model_fname) b_io = BytesIO() model.save(b_io) model_bytes = b_io.getvalue() b_io.close() random_params = dict((param_name, np.random.random(size=param.shape)) for param_name, param in params.items()) model.load_parameters(random_params) # Previous tests confirm that load_parameters works, # so just right into testing loading from file model.load_parameters(model_fname) new_actions_probas = model.action_probability(observations, actions=actions) assert np.all(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities changed " \ "after load_parameters from a file." # Reset with random parameters again model.load_parameters(random_params) # Now load from file-like (copy/paste from test_save) b_io = BytesIO(model_bytes) model.load_parameters(b_io) b_io.close() new_actions_probas = model.action_probability(observations, actions=actions) assert np.all(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities changed after" \ "load_parameters from a file-like." finally: if os.path.exists(model_fname): os.remove(model_fname) # Test `exact_match` functionality of load_parameters original_actions_probas = model.action_probability(observations, actions=actions) # Create dictionary with one variable name missing truncated_random_params = dict( (param_name, np.random.random(size=param.shape)) for param_name, param in params.items()) # Remove some element _ = truncated_random_params.pop(list(truncated_random_params.keys())[0]) # With exact_match=True, this should be an expection with pytest.raises(RuntimeError): model.load_parameters(truncated_random_params, exact_match=True) # Make sure we did not update model regardless new_actions_probas = model.action_probability(observations, actions=actions) assert np.all(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities changed " \ "after load_parameters raised " \ "RunTimeError (exact_match=True)." # With False, this should be fine model.load_parameters(truncated_random_params, exact_match=False) # Also check that results changed, again new_actions_probas = model.action_probability(observations, actions=actions) assert not np.any(np.isclose(original_actions_probas, new_actions_probas)), "Action probabilities did not " \ "change after changing model " \ "parameters (exact_match=False)." del model, env
def test_model_manipulation(model_class, storage_method): """ Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching works and that the action prediction works :param model_class: (BaseRLModel) A RL model """ try: env = DummyVecEnv([lambda: IdentityEnv(10)]) # create and train model = model_class(policy="MlpPolicy", env=env) model.learn(total_timesteps=50000, seed=0) # predict and measure the acc reward acc_reward = 0 set_global_seeds(0) obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) # Test action probability method model.action_probability(obs) obs, reward, _, _ = env.step(action) acc_reward += reward acc_reward = sum(acc_reward) / N_TRIALS # saving if storage_method == "path": # saving to a path model.save("./test_model") else: # saving to a file-like object (BytesIO in this case) b_io = BytesIO() model.save(b_io) model_bytes = b_io.getvalue() b_io.close() del model, env # loading if storage_method == "path": # loading from path model = model_class.load("./test_model") else: b_io = BytesIO( model_bytes ) # loading from file-like object (BytesIO in this case) model = model_class.load(b_io) b_io.close() # changing environment (note: this can be done at loading) env = DummyVecEnv([lambda: IdentityEnv(10)]) model.set_env(env) # predict the same output before saving loaded_acc_reward = 0 set_global_seeds(0) obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "loading and saving" # learn post loading model.learn(total_timesteps=100, seed=0) # validate no reset post learning loaded_acc_reward = 0 set_global_seeds(0) obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "pre learning and post learning" # predict new values obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, _, _, _ = env.step(action) del model, env finally: if os.path.exists("./test_model"): os.remove("./test_model")
def test_model_manipulation(request, model_class, storage_method, store_format): """ Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching works and that the action prediction works :param model_class: (BaseRLModel) A RL model :param storage_method: (str) Should file be saved to a file ("path") or to a buffer ("file-like") :param store_format: (str) Save format, either "zip" or "cloudpickle". """ # Use postfix ".model" so we can remove the file later model_fname = './test_model_{}.model'.format(request.node.name) store_as_cloudpickle = store_format == "cloudpickle" try: env = DummyVecEnv([lambda: IdentityEnv(10)]) # create and train model = model_class(policy="MlpPolicy", env=env) model.learn(total_timesteps=50000) # predict and measure the acc reward acc_reward = 0 set_global_seeds(0) obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) # Test action probability method model.action_probability(obs) obs, reward, _, _ = env.step(action) acc_reward += reward acc_reward = sum(acc_reward) / N_TRIALS # test action probability for given (obs, action) pair env = model.get_env() obs = env.reset() observations = np.array([obs for _ in range(10)]) observations = np.squeeze(observations) actions = np.array([env.action_space.sample() for _ in range(10)]) actions_probas = model.action_probability(observations, actions=actions) assert actions_probas.shape == (len(actions), 1), actions_probas.shape assert actions_probas.min() >= 0, actions_probas.min() assert actions_probas.max() <= 1, actions_probas.max() # saving if storage_method == "path": # saving to a path model.save(model_fname, cloudpickle=store_as_cloudpickle) else: # saving to a file-like object (BytesIO in this case) b_io = BytesIO() model.save(b_io, cloudpickle=store_as_cloudpickle) model_bytes = b_io.getvalue() b_io.close() del model, env # loading if storage_method == "path": # loading from path model = model_class.load(model_fname) else: b_io = BytesIO( model_bytes ) # loading from file-like object (BytesIO in this case) model = model_class.load(b_io) b_io.close() # changing environment (note: this can be done at loading) env = DummyVecEnv([lambda: IdentityEnv(10)]) model.set_env(env) # predict the same output before saving loaded_acc_reward = 0 set_global_seeds(0) obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "loading and saving" # learn post loading model.learn(total_timesteps=100) # validate no reset post learning loaded_acc_reward = 0 set_global_seeds(0) obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "pre learning and post learning" # predict new values obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, _, _, _ = env.step(action) del model, env finally: if os.path.exists(model_fname): os.remove(model_fname)
def make_env(): return IdentityEnv(ep_length=1e10, dim=2)
def test_model_manipulation(request, model_class, storage_method, store_format): """ Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching works and that the action prediction works :param model_class: (BaseRLModel) A RL model :param storage_method: (str) Should file be saved to a file ("path") or to a buffer ("file-like") :param store_format: (str) Save format, either "zip" or "cloudpickle". """ # Use postfix ".model" so we can remove the file later model_fname = './test_model_{}.model'.format(request.node.name) store_as_cloudpickle = store_format == "cloudpickle" kwargs = dict(seed=0, gamma=0.4) if model_class in [DQN]: kwargs["learning_starts"] = 0 kwargs["exploration_final_eps"] = 0.05 if model_class == PPO1: kwargs["entcoeff"] = 0.0 kwargs["optim_batchsize"] = 4 kwargs["timesteps_per_actorbatch"] = 4 if model_class in [A2C, ACKTR, PPO2]: kwargs["n_steps"] = 4 kwargs["ent_coef"] = 0.0 if model_class in [TRPO]: kwargs["timesteps_per_batch"] = 4 try: env = DummyVecEnv([lambda: IdentityEnv(10)]) # create and train model = model_class(policy="MlpPolicy", env=env, **kwargs) model.learn(total_timesteps=15) env.envs[0].action_space.seed(0) mean_reward, _ = evaluate_policy(model, env, deterministic=True, n_eval_episodes=N_EVAL_EPISODES) # test action probability for given (obs, action) pair env = model.get_env() obs = env.reset() observations = np.array( [env.step([env.action_space.sample()])[0] for _ in range(10)]) observations = np.squeeze(observations) selected_actions, _ = model.predict(observations, deterministic=True) actions = np.array([env.action_space.sample() for _ in range(10)]) actions_probas = model.action_probability(observations, actions=actions) assert actions_probas.shape == (len(actions), 1), actions_probas.shape assert actions_probas.min() >= 0, actions_probas.min() assert actions_probas.max() <= 1, actions_probas.max() # saving if storage_method == "path": # saving to a path model.save(model_fname, cloudpickle=store_as_cloudpickle) else: # saving to a file-like object (BytesIO in this case) b_io = BytesIO() model.save(b_io, cloudpickle=store_as_cloudpickle) model_bytes = b_io.getvalue() b_io.close() del model, env # loading if storage_method == "path": # loading from path model = model_class.load(model_fname) else: b_io = BytesIO( model_bytes ) # loading from file-like object (BytesIO in this case) model = model_class.load(b_io) b_io.close() # changing environment (note: this can be done at loading) env = DummyVecEnv([lambda: IdentityEnv(10)]) model.set_env(env) # check if model still selects the same actions new_selected_actions, _ = model.predict(observations, deterministic=True) assert np.allclose(selected_actions, new_selected_actions, 1e-4) # learn post loading model.learn(total_timesteps=15) # predict new values evaluate_policy(model, env, n_eval_episodes=N_EVAL_EPISODES) del model, env finally: if os.path.exists(model_fname): os.remove(model_fname)