def __init__(self, games_per_move): """ Constructs random search agent :param games_per_move: Games to play per possible move """ SearchAgent.__init__(self) self._games_per_move = games_per_move self._random_agent = RandomAgent()
def __init__(self, search_depth, games_per_move): """ Constructs random search agent :param search_depth: Additional moves to play after each starting move :param games_per_move: Games to play per possible move """ SearchAgent.__init__(self) self._search_depth = search_depth self._games_per_move = games_per_move self._random_agent = RandomAgent()
class RandomSearchAgent(SearchAgent): def __init__(self, games_per_move): """ Constructs random search agent :param games_per_move: Games to play per possible move """ SearchAgent.__init__(self) self._games_per_move = games_per_move self._random_agent = RandomAgent() def select_move(self, game): """ Selects next move to make for given game :param game: Current game [SquareStackerGame] :return move: Move [k, i, j] or None if no moves exist :return moves_searched: Number of moves searched before deciding """ # Moves searched counter moves_searched = 0 # Get valid moves valid_moves = game.get_valid_moves() num_valid_moves = len(valid_moves) if num_valid_moves > 0: # Array of mean scores per next move mean_scores = [] # For each valid initial move for next_move in valid_moves: # Make initial move game_next = game.deepcopy() game_next.make_move(next_move) moves_searched += 1 # Play N games with random agent mean_score = 0.0 for g in range(self._games_per_move): game_test = game_next.deepcopy() while True: test_move = self._random_agent.select_move(game_test) if test_move is not None: game_test.make_move(test_move) moves_searched += 1 else: break mean_score += game_test.get_score() mean_score /= self._games_per_move # Compute mean score mean_scores.append(mean_score) # Select move with highest mean score move = valid_moves[np.argmax(mean_scores)] return move, moves_searched else: return None, moves_searched
from tictactoe import O, STEP_RESULT, TicTacToe, X from agents.random import Agent as RandomAgent from agents.temporaldifference import Agent as TDAgent game_settings = { "win_length": 3, "size": 3, } a1 = RandomAgent(**game_settings) a2 = TDAgent(**game_settings, include_end_game_bias=True, step_size_param=0.7, explore_ratio=0.5) # a2 = RandomAgent(**game_settings) def run_agents(agent1, agent2, episodes=100, verbose=False, train=True): agent1wins = 0 agent2wins = 0 draws = 0 for episode in range(episodes): game = TicTacToe(**game_settings) res = STEP_RESULT.NONE agent1_team = X agent2_team = O if episode % 2 == 1: agent1_team = X agent2_team = O
""" random_.py Test script for Square Stacker Random Agent """ from agents.random import RandomAgent from tests.agent import test_agent # Test Settings test_num_games = 5000 # Test Agent agent = RandomAgent() test_agent(agent, num_games=test_num_games)
role=e, recording_path=None) vqae = None if args.use_vqae: # Initialize VQAE vqae = Autoencoder(plot_class=plot_class) # Initialize processor processor = MalmoProcessor(autoencoder=vqae, plot_class=plot_class, action_space=action_space) if args.agent_type == 'Random': # Use Random agent to train the VQAE agent = RandomAgent(num_actions=env.available_actions, processor=processor) elif args.agent_type == 'DDQN': # Setup exploration policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=opt.eps_value_max, value_min=opt.eps_value_min, value_test=opt.eps_value_test, nb_steps=opt.eps_decay_steps) if opt.use_quantized_observations: agent = TabularQAgent(num_states=opt.state_vector_length, num_actions=env.available_actions, policy=policy, test_policy=policy, processor=processor) else:
env = RoomsEnvironment(action_space=action_space, mission_name=mission_name, mission_xml=mission_xml, remotes=clients, state_builder=state_builder, role=e, recording_path=None) vqae = None if args.use_vqae: # Initialize VQAE vqae = Autoencoder(plot_class=plot_class) # Initialize processor processor = MalmoProcessor(autoencoder=vqae, plot_class=plot_class, action_space=action_space) if args.agent_type == 'Random': # Use Random agent to train the VQAE agent = RandomAgent(num_actions=env.available_actions, processor=processor) elif args.agent_type == 'DDQN': # Setup exploration policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=opt.eps_value_max, value_min=opt.eps_value_min, value_test=opt.eps_value_test, nb_steps=opt.eps_decay_steps) if opt.use_quantized_observations: agent = TabularQAgent(num_states=opt.state_vector_length, num_actions=env.available_actions, policy=policy, test_policy=policy, processor=processor) else: # Setup DQN agent if opt.recurrent:
else: # Setup DQN agent if opt.recurrent: model = DRQN_Model(window_length=opt.dqn_window_length, num_actions=env.action_space.n) else: model = DQN_Model(window_length=opt.dqn_window_length, num_actions=env.action_space.n) # Setup DQN agent agent = DQN(model=model, num_actions=env.action_space.n, policy=policy, test_policy=policy, processor=processor) else: agent = RandomAgent(num_actions=env.action_space.n, processor=processor) print(args.env_name + ' initialized.') # Setup weights path path = os.path.join('weights', 'Atari', '{}'.format(args.env_name)) if not os.path.exists(path): os.makedirs(path) weights_path = os.path.join(path, 'weights.hdf5') # Run the agent agent.fit(env=env, num_steps=args.steps, weights_path=weights_path, visualize=args.visualize) agent.save(weights_path)