Beispiel #1
0
def test_can_save_ppo_to_memory(RPSTask, ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO')
    save_path = '/tmp/test_save.agent'
    hook = AgentHook(agent, save_path=save_path)

    assess_file_has_been_saved_on_disk_and_not_on_ram(hook, save_path)
    os.remove(save_path)
Beispiel #2
0
def test_ppo_rnn_can_take_actions(RPSTask, ppo_rnn_config_dict):
    env = RPSTask.env
    agent = build_PPO_Agent(RPSTask, ppo_rnn_config_dict, 'RNN_PPO')
    number_of_actions = 30
    for i in range(number_of_actions):
        # asumming that first observation corresponds to observation space of this agent
        random_observation = env.observation_space.sample()[0]
        a = agent.take_action(random_observation)
        observation, rewards, done, info = env.step([a, a])
Beispiel #3
0
def test_can_hook_ppo_agent_using_cuda(RPSTask, ppo_config_dict):
    ppo_config_dict['use_cuda'] = True
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO')
    assert all(
        map(lambda param: param.is_cuda, agent.algorithm.model.parameters()))
    hook = AgentHook(agent)

    compare_against_expected_agenthook(agent, hook, AgentType.PPO,
                                       [hook.agent.algorithm.model])
Beispiel #4
0
def test_can_load_ppo_from_agenthook_disabling_cuda(RPSTask, ppo_config_dict):
    ppo_config_dict['use_cuda'] = True
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO')
    save_path = '/tmp/test_save.agent'
    hook = AgentHook(agent, save_path=save_path)

    retrieved_agent = AgentHook.unhook(hook, use_cuda=False)
    model = retrieved_agent.algorithm.model
    assert all(map(lambda param: not param.is_cuda, model.parameters()))
Beispiel #5
0
def test_can_unhook_ppo_agenthook_with_cuda(RPSTask, ppo_config_dict):
    ppo_config_dict['use_cuda'] = True
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO')
    assert all(
        map(lambda param: param.is_cuda, agent.algorithm.model.parameters()))
    hook = AgentHook(agent)

    retrieved_agent = AgentHook.unhook(hook)

    compare_against_expected_retrieved_agent(agent, retrieved_agent,
                                             [retrieved_agent.algorithm.model])
Beispiel #6
0
def test_can_load_ppo_from_agenthook_with_cuda(RPSTask, ppo_config_dict):
    ppo_config_dict['use_cuda'] = True
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'PPO')
    save_path = '/tmp/test_save.agent'
    hook = AgentHook(agent, save_path=save_path)

    assert not hasattr(hook, 'agent')

    retrieved_agent = AgentHook.unhook(hook)
    model_list = [retrieved_agent.algorithm.model]
    assert_model_parameters_are_cuda_tensors(model_list)
Beispiel #7
0
def test_ppo_can_solve_multi_env_cartpole(CartPoleTask, ppo_config_dict):
    agent = build_PPO_Agent(CartPoleTask, ppo_config_dict, 'PPO-CartPole-Test')
    agent.state_preprocessing = batch_vector_observation  # Required for multiactor

    from torch.utils.tensorboard import SummaryWriter
    regym.rl_algorithms.PPO.ppo_loss.summary_writer = SummaryWriter('ppo_test_tensorboard')

    test_trajectories = multiactor_task_test(CartPoleTask, agent, train_episodes=5000, test_episodes=100)

    max_traj_len = 200
    solved_threshold = 100
    total_test_trajectory_len = reduce(lambda acc, t: acc + len(t),
                                       test_trajectories, 0)
    assert total_test_trajectory_len / len(test_trajectories) >= solved_threshold
Beispiel #8
0
def test_learns_to_beat_rock_in_RPS_rnn(RPSTask, ppo_rnn_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    agent = build_PPO_Agent(RPSTask, ppo_rnn_config_dict, 'RNN_PPO')
    agent.state_preprocessing = flatten_and_turn_into_single_element_batch
    assert agent.training
    learn_against_fix_opponent(agent, fixed_opponent=rockAgent,
                               agent_position=0, # Doesn't matter in RPS
                               task=RPSTask,
                               training_episodes=250,
                               benchmark_every_n_episodes=0,
                               test_episodes=50,
                               reward_tolerance=1.,
                               maximum_average_reward=10.0,
                               evaluation_method='cumulative')
Beispiel #9
0
def test_learns_to_beat_rock_in_RPS_rnn(RPSTask, ppo_rnn_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent

    agent = build_PPO_Agent(RPSTask, ppo_rnn_config_dict, 'RNN_PPO')
    assert agent.training
    learn_against_fix_opponent(
        agent,
        fixed_opponent=rockAgent,
        agent_position=0,  # Doesn't matter in RPS
        task=RPSTask,
        total_episodes=500,
        training_percentage=0.9,
        reward_tolerance=1.,
        maximum_average_reward=10.0,
        evaluation_method='cumulative')
Beispiel #10
0
def test_mlp_architecture_learns_to_beat_kuhn_poker(KuhnTask, ppo_config_dict):
    build_agent_func = lambda: build_PPO_Agent(KuhnTask, ppo_config_dict, 'PPO-MLP')
    play_kuhn_poker_all_positions_all_fixed_agents(build_agent_func)
Beispiel #11
0
def test_ppo_rnn_can_take_actions(KuhnTask, ppo_rnn_config_dict):
    agent = build_PPO_Agent(KuhnTask, ppo_rnn_config_dict, 'RNN_PPO')
    agent.state_preprocessing = flatten_and_turn_into_single_element_batch
    act_in_task_env(KuhnTask, agent)
Beispiel #12
0
def test_ppo_rnn_can_take_actions(KuhnTask, ppo_rnn_config_dict):
    agent = build_PPO_Agent(KuhnTask, ppo_rnn_config_dict, 'RNN_PPO')
    act_in_task_env(KuhnTask, agent)