def test_can_save_ppo_to_memory(RPSTask, ppo_config_dict): agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO') save_path = '/tmp/test_save.agent' hook = AgentHook(agent, save_path=save_path) assess_file_has_been_saved_on_disk_and_not_on_ram(hook, save_path) os.remove(save_path)
def test_ppo_rnn_can_take_actions(RPSTask, ppo_rnn_config_dict): env = RPSTask.env agent = build_PPO_Agent(RPSTask, ppo_rnn_config_dict, 'RNN_PPO') number_of_actions = 30 for i in range(number_of_actions): # asumming that first observation corresponds to observation space of this agent random_observation = env.observation_space.sample()[0] a = agent.take_action(random_observation) observation, rewards, done, info = env.step([a, a])
def test_can_hook_ppo_agent_using_cuda(RPSTask, ppo_config_dict): ppo_config_dict['use_cuda'] = True agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO') assert all( map(lambda param: param.is_cuda, agent.algorithm.model.parameters())) hook = AgentHook(agent) compare_against_expected_agenthook(agent, hook, AgentType.PPO, [hook.agent.algorithm.model])
def test_can_load_ppo_from_agenthook_disabling_cuda(RPSTask, ppo_config_dict): ppo_config_dict['use_cuda'] = True agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO') save_path = '/tmp/test_save.agent' hook = AgentHook(agent, save_path=save_path) retrieved_agent = AgentHook.unhook(hook, use_cuda=False) model = retrieved_agent.algorithm.model assert all(map(lambda param: not param.is_cuda, model.parameters()))
def test_can_unhook_ppo_agenthook_with_cuda(RPSTask, ppo_config_dict): ppo_config_dict['use_cuda'] = True agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO') assert all( map(lambda param: param.is_cuda, agent.algorithm.model.parameters())) hook = AgentHook(agent) retrieved_agent = AgentHook.unhook(hook) compare_against_expected_retrieved_agent(agent, retrieved_agent, [retrieved_agent.algorithm.model])
def test_can_load_ppo_from_agenthook_with_cuda(RPSTask, ppo_config_dict): ppo_config_dict['use_cuda'] = True agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO') save_path = '/tmp/test_save.agent' hook = AgentHook(agent, save_path=save_path) assert not hasattr(hook, 'agent') retrieved_agent = AgentHook.unhook(hook) model_list = [retrieved_agent.algorithm.model] assert_model_parameters_are_cuda_tensors(model_list)
def test_ppo_can_solve_multi_env_cartpole(CartPoleTask, ppo_config_dict): agent = build_PPO_Agent(CartPoleTask, ppo_config_dict, 'PPO-CartPole-Test') agent.state_preprocessing = batch_vector_observation # Required for multiactor from torch.utils.tensorboard import SummaryWriter regym.rl_algorithms.PPO.ppo_loss.summary_writer = SummaryWriter('ppo_test_tensorboard') test_trajectories = multiactor_task_test(CartPoleTask, agent, train_episodes=5000, test_episodes=100) max_traj_len = 200 solved_threshold = 100 total_test_trajectory_len = reduce(lambda acc, t: acc + len(t), test_trajectories, 0) assert total_test_trajectory_len / len(test_trajectories) >= solved_threshold
def test_learns_to_beat_rock_in_RPS_rnn(RPSTask, ppo_rnn_config_dict): ''' Test used to make sure that agent is 'learning' by learning a best response against an agent that only plays rock in rock paper scissors. i.e from random, learns to play only (or mostly) paper ''' agent = build_PPO_Agent(RPSTask, ppo_rnn_config_dict, 'RNN_PPO') agent.state_preprocessing = flatten_and_turn_into_single_element_batch assert agent.training learn_against_fix_opponent(agent, fixed_opponent=rockAgent, agent_position=0, # Doesn't matter in RPS task=RPSTask, training_episodes=250, benchmark_every_n_episodes=0, test_episodes=50, reward_tolerance=1., maximum_average_reward=10.0, evaluation_method='cumulative')
def test_learns_to_beat_rock_in_RPS_rnn(RPSTask, ppo_rnn_config_dict): ''' Test used to make sure that agent is 'learning' by learning a best response against an agent that only plays rock in rock paper scissors. i.e from random, learns to play only (or mostly) paper ''' from play_against_fixed_opponent import learn_against_fix_opponent agent = build_PPO_Agent(RPSTask, ppo_rnn_config_dict, 'RNN_PPO') assert agent.training learn_against_fix_opponent( agent, fixed_opponent=rockAgent, agent_position=0, # Doesn't matter in RPS task=RPSTask, total_episodes=500, training_percentage=0.9, reward_tolerance=1., maximum_average_reward=10.0, evaluation_method='cumulative')
def test_mlp_architecture_learns_to_beat_kuhn_poker(KuhnTask, ppo_config_dict): build_agent_func = lambda: build_PPO_Agent(KuhnTask, ppo_config_dict, 'PPO-MLP') play_kuhn_poker_all_positions_all_fixed_agents(build_agent_func)
def test_ppo_rnn_can_take_actions(KuhnTask, ppo_rnn_config_dict): agent = build_PPO_Agent(KuhnTask, ppo_rnn_config_dict, 'RNN_PPO') agent.state_preprocessing = flatten_and_turn_into_single_element_batch act_in_task_env(KuhnTask, agent)
def test_ppo_rnn_can_take_actions(KuhnTask, ppo_rnn_config_dict): agent = build_PPO_Agent(KuhnTask, ppo_rnn_config_dict, 'RNN_PPO') act_in_task_env(KuhnTask, agent)