Exemple #1
0
def test_dueling_DQN_learns_to_beat_rock_in_RPS(RPSTask, dqn_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent

    from torch.utils.tensorboard import SummaryWriter
    import regym
    regym.rl_algorithms.DQN.dqn_loss.summary_writer = SummaryWriter(
        'test_tensorboard')
    dqn_config_dict['dueling'] = True
    agent = build_DQN_Agent(RPSTask, dqn_config_dict, 'Dueling_DQN')
    assert agent.training and agent.algorithm.use_dueling
    learn_against_fix_opponent(
        agent,
        fixed_opponent=rockAgent,
        agent_position=0,  # Doesn't matter in RPS
        task=RPSTask,
        total_episodes=250,
        training_percentage=0.9,
        reward_tolerance=2.,
        maximum_average_reward=10.0,
        evaluation_method='cumulative')
def test_learns_to_beat_rock_in_RPS(RPSTaskSingleRepetition, tabular_q_learning_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent
    tabular_q_learning_config_dict['use_repeated_update_q_learning'] = True

    agent = build_TabularQ_Agent(RPSTaskSingleRepetition, tabular_q_learning_config_dict, 'TQL_RUQL')
    assert agent.training
    learn_against_fix_opponent(agent, fixed_opponent=rockAgent,
                               agent_position=0,  # Doesn't matter in RPS
                               task=RPSTaskSingleRepetition,
                               total_episodes=500, training_percentage=0.9,
                               reward_tolerance=0.,
                               maximum_average_reward=1.0,
                               evaluation_method='cumulative')
Exemple #3
0
def test_a2c_learns_to_beat_rock_in_RPS(RPSTask, a2c_config_dict):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent

    agent = build_A2C_Agent(RPSTask, a2c_config_dict, 'A2C')
    assert agent.training
    learn_against_fix_opponent(agent, fixed_opponent=rockAgent,
                               agent_position=0, # Doesn't matter in RPS
                               task=RPSTask,
                               training_episodes=100,
                               benchmark_every_n_episodes=0,
                               test_episodes=50,
                               reward_tolerance=1.,
                               maximum_average_reward=10.0,
                               evaluation_method='cumulative')
Exemple #4
0
def play_against_fixed_agent(agent,
                             fixed_agent_action,
                             agent_position,
                             max_reward,
                             total_episodes=2000):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent

    class FixedAgent(Agent):
        def __init__(self, action):
            super(FixedAgent, self).__init__(name=f'FixedAction: {action}')
            self.action = action

        def take_action(self, *args):
            return self.action

        def handle_experience(self, *args):
            pass

        def clone(self, *args):
            pass

    fixed_opponent = FixedAgent(fixed_agent_action)
    kuhn_task = generate_task('KuhnPoker-v0',
                              EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    assert agent.training
    learn_against_fix_opponent(agent,
                               fixed_opponent=fixed_opponent,
                               agent_position=agent_position,
                               task=kuhn_task,
                               total_episodes=total_episodes,
                               training_percentage=0.9,
                               reward_tolerance=1.,
                               maximum_average_reward=max_reward,
                               evaluation_method='last')