def __init__(self, games_per_move):
     """
     Constructs random search agent
     :param games_per_move: Games to play per possible move
     """
     SearchAgent.__init__(self)
     self._games_per_move = games_per_move
     self._random_agent = RandomAgent()
Example #2
0
 def __init__(self, search_depth, games_per_move):
     """
     Constructs random search agent
     :param search_depth: Additional moves to play after each starting move
     :param games_per_move: Games to play per possible move
     """
     SearchAgent.__init__(self)
     self._search_depth = search_depth
     self._games_per_move = games_per_move
     self._random_agent = RandomAgent()
class RandomSearchAgent(SearchAgent):
    def __init__(self, games_per_move):
        """
        Constructs random search agent
        :param games_per_move: Games to play per possible move
        """
        SearchAgent.__init__(self)
        self._games_per_move = games_per_move
        self._random_agent = RandomAgent()

    def select_move(self, game):
        """
        Selects next move to make for given game
        :param game: Current game [SquareStackerGame]
        :return move: Move [k, i, j] or None if no moves exist
        :return moves_searched: Number of moves searched before deciding
        """

        # Moves searched counter
        moves_searched = 0

        # Get valid moves
        valid_moves = game.get_valid_moves()
        num_valid_moves = len(valid_moves)

        if num_valid_moves > 0:

            # Array of mean scores per next move
            mean_scores = []

            # For each valid initial move
            for next_move in valid_moves:

                # Make initial move
                game_next = game.deepcopy()
                game_next.make_move(next_move)
                moves_searched += 1

                # Play N games with random agent
                mean_score = 0.0
                for g in range(self._games_per_move):
                    game_test = game_next.deepcopy()
                    while True:
                        test_move = self._random_agent.select_move(game_test)
                        if test_move is not None:
                            game_test.make_move(test_move)
                            moves_searched += 1
                        else:
                            break
                    mean_score += game_test.get_score()
                mean_score /= self._games_per_move

                # Compute mean score
                mean_scores.append(mean_score)

            # Select move with highest mean score
            move = valid_moves[np.argmax(mean_scores)]
            return move, moves_searched
        else:
            return None, moves_searched
Example #4
0
from tictactoe import O, STEP_RESULT, TicTacToe, X
from agents.random import Agent as RandomAgent
from agents.temporaldifference import Agent as TDAgent

game_settings = {
    "win_length": 3,
    "size": 3,
}

a1 = RandomAgent(**game_settings)
a2 = TDAgent(**game_settings,
             include_end_game_bias=True,
             step_size_param=0.7,
             explore_ratio=0.5)
# a2 = RandomAgent(**game_settings)


def run_agents(agent1, agent2, episodes=100, verbose=False, train=True):
    agent1wins = 0
    agent2wins = 0
    draws = 0
    for episode in range(episodes):
        game = TicTacToe(**game_settings)
        res = STEP_RESULT.NONE

        agent1_team = X
        agent2_team = O
        if episode % 2 == 1:
            agent1_team = X
            agent2_team = O
"""
random_.py
Test script for Square Stacker Random Agent
"""

from agents.random import RandomAgent
from tests.agent import test_agent

# Test Settings
test_num_games = 5000

# Test Agent
agent = RandomAgent()
test_agent(agent, num_games=test_num_games)
Example #6
0
                           role=e,
                           recording_path=None)

    vqae = None
    if args.use_vqae:
        # Initialize VQAE
        vqae = Autoencoder(plot_class=plot_class)

    # Initialize processor
    processor = MalmoProcessor(autoencoder=vqae,
                               plot_class=plot_class,
                               action_space=action_space)

    if args.agent_type == 'Random':
        # Use Random agent to train the VQAE
        agent = RandomAgent(num_actions=env.available_actions,
                            processor=processor)
    elif args.agent_type == 'DDQN':
        # Setup exploration policy
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                      attr='eps',
                                      value_max=opt.eps_value_max,
                                      value_min=opt.eps_value_min,
                                      value_test=opt.eps_value_test,
                                      nb_steps=opt.eps_decay_steps)
        if opt.use_quantized_observations:
            agent = TabularQAgent(num_states=opt.state_vector_length,
                                  num_actions=env.available_actions,
                                  policy=policy,
                                  test_policy=policy,
                                  processor=processor)
        else:
Example #7
0
    env = RoomsEnvironment(action_space=action_space, mission_name=mission_name, mission_xml=mission_xml,
                           remotes=clients, state_builder=state_builder, role=e, recording_path=None)

    vqae = None
    if args.use_vqae:
      # Initialize VQAE
      vqae = Autoencoder(plot_class=plot_class)

    # Initialize processor
    processor = MalmoProcessor(autoencoder=vqae,
                               plot_class=plot_class,
                               action_space=action_space)

    if args.agent_type == 'Random':
      # Use Random agent to train the VQAE
      agent = RandomAgent(num_actions=env.available_actions, processor=processor)
    elif args.agent_type == 'DDQN':
      # Setup exploration policy
      policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                    attr='eps', value_max=opt.eps_value_max,
                                    value_min=opt.eps_value_min,
                                    value_test=opt.eps_value_test, nb_steps=opt.eps_decay_steps)
      if opt.use_quantized_observations:
        agent = TabularQAgent(num_states=opt.state_vector_length,
                              num_actions=env.available_actions,
                              policy=policy,
                              test_policy=policy,
                              processor=processor)
      else:
        # Setup DQN agent
        if opt.recurrent:
Example #8
0
    else:
        # Setup DQN agent
        if opt.recurrent:
            model = DRQN_Model(window_length=opt.dqn_window_length,
                               num_actions=env.action_space.n)
        else:
            model = DQN_Model(window_length=opt.dqn_window_length,
                              num_actions=env.action_space.n)
        # Setup DQN agent
        agent = DQN(model=model,
                    num_actions=env.action_space.n,
                    policy=policy,
                    test_policy=policy,
                    processor=processor)
else:
    agent = RandomAgent(num_actions=env.action_space.n, processor=processor)

print(args.env_name + ' initialized.')

# Setup weights path
path = os.path.join('weights', 'Atari', '{}'.format(args.env_name))
if not os.path.exists(path):
    os.makedirs(path)
weights_path = os.path.join(path, 'weights.hdf5')

# Run the agent
agent.fit(env=env,
          num_steps=args.steps,
          weights_path=weights_path,
          visualize=args.visualize)
agent.save(weights_path)