def test_custom_player(self): """ CustomPlayer successfully completes a game against itself """ agents = (Agent(CustomPlayer, "Player 1"), Agent(CustomPlayer, "Player 2")) initial_state = Isolation() winner, game_history, _ = play( (agents, initial_state, self.time_limit, 0)) state = initial_state moves = deque(game_history) while moves: state = state.result(moves.popleft()) if not state.terminal_test(): print( "Your agent with id:{state.player()} was not able to make a move in state:" ) print(state.player()) debug_state = DebugState.from_state(state) print(debug_state) raise Exception("Your agent did not play until a terminal state.") debug_state = DebugState.from_state(state) print(debug_state) print("Winner is: " + str(winner) + "!")
def get_action(self, state): start_time = time.perf_counter() actions = state.actions() self.queue.put(random.choice(actions)) states = [state.result(action) for action in actions] agents = (Agent(self.agent, '1'), Agent(MCTSMaximum, '2')) if self.verbose_depth: print('\n' + self.__class__.__name__.ljust(20) + ' | depth:', end=' ', flush=True) for index in range(sys.maxsize): try: winner, game_history, match_id = play_sync(agents, state, time_limit=0, logging=False) winner_idx = agents.index(winner) self.agent.backpropagate(winner_idx, game_history) scores = [ self.agent.data[state].score for state in states if state in self.agent.data ] action, score = max(zip(actions, scores), key=itemgetter(1)) self.queue.put(action) except: pass if self.verbose_depth: print(index, end=' ', flush=True) if (time.perf_counter() - start_time) > (self.time_limit / 1024): break
def test_custom_player(self): """ CustomPlayer successfully completes a game against itself """ agents = (Agent(CustomPlayer, "Player 1"), Agent(CustomPlayer, "Player 2")) initial_state = Isolation() winner, game_history, _ = play((agents, initial_state, self.time_limit, 0)) state = initial_state moves = deque(game_history) while moves: state = state.result(moves.popleft()) self.assertTrue(state.terminal_test(), "Your agent did not play until a terminal state.")
def main(): args = argparser() test_agent = TEST_AGENTS[args.opponent.upper()] custom_agent = Agent(CustomPlayer, "Custom Agent") players = (test_agent, custom_agent) results = {player: 0 for player in players} match_count = args.rounds * 2 game_histories = [] time_start = time.perf_counter() print("{} vs {} | Running {} games:".format(custom_agent.name, test_agent.name, match_count)) for match_id in range(match_count): player_order = (players[(match_id) % 2], players[(match_id + 1) % 2] ) # reverse player order between matches winner, game_history, match_id = play_sync(player_order, match_id=match_id, **vars(args)) results[winner] += 1 game_histories.append(game_history) if not args.verbose: print('+' if winner == custom_agent else '-', end='', flush=True) time_taken = time.perf_counter() - time_start percentage = 100 * (results[custom_agent] / match_count) message = "{} won {}/{} ({:.1f}%) of matches against {} in {:.0f}s ({:.2f}s/round)".format( custom_agent.name, results[custom_agent], match_count, percentage, test_agent.name, time_taken, time_taken / match_count) print() print(message) _logger.info(message) print()
def main(args): from statistics import mean test_agent = TEST_AGENTS[args.opponent.upper()] custom_agent = Agent(CustomPlayer, "Custom Agent") start_time = time.perf_counter() wins, num_games, nodes_expanded, algo_exec_time, depths, plies = play_matches( custom_agent, test_agent, args) end_time = time.perf_counter() logger.info("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print("Your agent took on average {} plies to complete, min: {}, max: {}". format(mean(plies), min(plies), max(plies))) print("Your agent expanded on average {} nodes, min: {}, max: {}".format( int(mean(nodes_expanded)), min(nodes_expanded), max(nodes_expanded))) print("Your agent took on average {} seconds to finish a game".format( round(mean(algo_exec_time), 2))) #print(f"Depths: {depths}") #print(f"Depths: {flatten(depths)}") #new_combined_depths = [x for l in depths for x in l] # flatten it for alpha beta new_combined_depths = flatten(depths) # flatten for MCTS print( "Your agent reached the following depths: min: {}, mean: {}, median: {}, mode: {}, max: {}" .format(min(new_combined_depths), round(statistics.mean(new_combined_depths), 2), statistics.median(new_combined_depths), statistics.mode(new_combined_depths), max(new_combined_depths)))
def run_backpropagation(args): assert args['agent'].upper() in TEST_AGENTS, '{} not in {}'.format( args['agent'], TEST_AGENTS.keys()) assert args['opponent'].upper() in TEST_AGENTS, '{} not in {}'.format( args['opponent'], TEST_AGENTS.keys()) agent1 = TEST_AGENTS.get(args['agent'].upper()) agent2 = TEST_AGENTS.get(args['opponent'].upper()) if agent1.name == agent2.name: agent1 = Agent(agent1.agent_class, agent1.name) agent2 = Agent(agent2.agent_class, agent2.name + ' 2') agents = (agent1, agent2) # Reset caches if args.get('reset'): for agent_idx, agent in enumerate(agents): if callable(getattr(agent.agent_class, 'reset', None)): agent.agent_class.reset() scores = {agent: [] for agent in agents} start_time = time.perf_counter() match_id = 0 while True: if args.get('rounds', 0) and args['rounds'] <= match_id: break if args.get('timeout', 0) and args['timeout'] <= time.perf_counter() - start_time: break match_id += 1 agent_order = (agents[(match_id) % 2], agents[(match_id + 1) % 2] ) # reverse player order between matches winner, game_history, match_id = play_sync(agent_order, match_id=match_id, **args) winner_idx = agent_order.index(winner) loser = agent_order[int(not winner_idx)] scores[winner] += [1] scores[loser] += [0] for agent_idx, agent in enumerate(agent_order): if callable(getattr(agent.agent_class, 'backpropagate', None)): agent.agent_class.backpropagate(winner_idx=winner_idx, game_history=game_history) log_results(agents, scores, match_id, winner, start_time, args)
def main(args): test_agent = TEST_AGENTS[args.opponent.upper()] custom_agent = Agent(CustomPlayer, "Custom Agent") wins, num_games = play_matches(custom_agent, test_agent, args) logger.info("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print()
def main(args): test_agent = TEST_AGENTS[args.opponent.upper()] custom_agent = Agent(CustomPlayer, "Custom Agent") wins, num_games, my_depth, oppo_depth = play_matches( custom_agent, test_agent, args.rounds, args.processes, args.fair_matches) logger.info("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print( "Your agent mean search depth is {:.2f}, your opponent mean search depth is {:.2f}" .format(my_depth, oppo_depth)) print()
def main(args): # custom_agent_combos=[CustomPlayer_0,CustomPlayer_1,CustomPlayer_2,CustomPlayer_3, # CustomPlayer_4,CustomPlayer_5,CustomPlayer_6,CustomPlayer_7,CustomPlayer_8, # CustomPlayer_9] custom_agent_combos = [CustomPlayer_10] for combo in custom_agent_combos: test_agent = TEST_AGENTS[args.opponent.upper()] custom_agent = Agent(combo, "Custom Agent") print() print('Custom Agent: ', combo.__name__) wins, num_games = play_matches(custom_agent, test_agent, args) logger.info("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print()
from isolation import Isolation, Agent, DebugState from my_custom_player import CustomPlayer import train # main code if __name__ == '__main__': board = DebugState() debug_board = board.from_state(board) test_agent = TEST_AGENTS['MINIMAX']¬ custom_agent = Agent(CustomPlayer, "Custom Agent")¬ wins, num_games = play_matches(custom_agent, test_agent, args) print(debug_board)
from collections import namedtuple from multiprocessing.pool import ThreadPool as Pool from isolation import Isolation, Agent, play from sample_players import RandomPlayer, GreedyPlayer, MinimaxPlayer from my_custom_player import CustomPlayer logger = logging.getLogger(__name__) NUM_PROCS = 4 NUM_ROUNDS = 50 # number times to replicate the match; increase for higher confidence estimate TIME_LIMIT = 150 # number of milliseconds before timeout TEST_AGENTS = { "RANDOM": Agent(RandomPlayer, "Random Agent"), "GREEDY": Agent(GreedyPlayer, "Greedy Agent"), "MINIMAX": Agent(MinimaxPlayer, "Minimax Agent"), "SELF": Agent(CustomPlayer, "Custom TestAgent") } Match = namedtuple("Match", "players initial_state time_limit match_id debug_flag") def _run_matches(matches, name, num_processes=NUM_PROCS, debug=False): results = [] pool = Pool(1) if debug else Pool(num_processes) print("Running {} games:".format(len(matches))) for result in pool.imap_unordered(play, matches): print("+" if result[0].name == name else '-', end="") results.append(result)
from sample_players import RandomPlayer, GreedyPlayer, MinimaxPlayer #from my_custom_player import CustomPlayer from my_custom_player import BaselinePlayer from my_custom_player import HeuristicPlayer1 from my_custom_player import HeuristicPlayer2 from my_custom_player import HeuristicPlayer3 from my_custom_player import HeuristicPlayer4 logger = logging.getLogger(__name__) NUM_PROCS = 1 NUM_ROUNDS = 5 # number times to replicate the match; increase for higher confidence estimate TIME_LIMIT = 150 # number of milliseconds before timeout TEST_AGENTS_1 = { "RANDOM": Agent(RandomPlayer, "Random Player 1"), "GREEDY": Agent(GreedyPlayer, "Greedy Player 1"), "MINIMAX": Agent(MinimaxPlayer, "Minimax Player 1"), #"SELF": Agent(CustomPlayer, "Custom TestAgent") "BASELINE": Agent(BaselinePlayer, "Baseline Player 1"), "H1": Agent(HeuristicPlayer1, "Heuristic #1 Player 1"), "H2": Agent(HeuristicPlayer2, "Heuristic #2 Player 1"), "H3": Agent(HeuristicPlayer3, "Heuristic #3 Player 1"), "H4": Agent(HeuristicPlayer4, "Heuristic #4 Player 1") } TEST_AGENTS_2 = { "RANDOM": Agent(RandomPlayer, "Random Player 2"), "GREEDY": Agent(GreedyPlayer, "Greedy Player 2"), "MINIMAX": Agent(MinimaxPlayer, "Minimax Player 2"), #"SELF": Agent(CustomPlayer, "Custom TestAgent")
def main(args): test_agent = TEST_AGENTS[args.opponent.upper()] custom_agent = Agent(CustomPlayer, "Custom Agent") table = build_table(num_rounds=int(args.rounds))
def play_sync( agents: Tuple[Agent, Agent], game_state=None, # defaults to Isolation() time_limit=TIME_LIMIT, match_id=0, debug=False, # disables the signal timeout logging=True, verbose=False, # prints an ASCII copy of the board after each turn exceptions=False, max_moves=0, # end the game early after a set number of turns callbacks: List[Callable] = None, **kwargs): gc.collect( 1 ) # reduce chance of TimeoutError in call_with_timeout_ms() | gc.collect(2) is an expensive function agents = tuple( Agent(agent, agent.__class__.name ) if not isinstance(agent, Agent) else agent for agent in agents) players = tuple(a.agent_class(player_id=i) for i, a in enumerate(agents)) game_state = game_state or Isolation() initial_state = game_state active_idx = 0 winner = None loser = None status = Status.NORMAL game_history = [] callbacks = copy(callbacks) or [] if logging: logger.info(GAME_INFO.format(initial_state, *agents)) while not game_state.terminal_test(): if max_moves and game_state.ply_count >= max_moves: break turn_start = time.perf_counter() active_idx = game_state.player() active_player = players[active_idx] winner, loser = agents[1 - active_idx], agents[ active_idx] # any problems during get_action means the active player loses action = None active_player.queue = LifoQueue() # we don't need a TimeoutQueue here try: if time_limit == 0 or debug: active_player.get_action(game_state) action = active_player.queue.get( block=False) # raises Empty if agent did not respond else: # increment timeout 2x before throwing exception - MinimaxAgent occasionally takes longer than 150ms for i in [1, 2]: exception = call_with_timeout_ms(i * time_limit, active_player.get_action, game_state) if not active_player.queue.empty(): action = active_player.queue.get( block=False ) # raises Empty if agent did not respond break # accept answer generated after minimum timeout if exceptions and action is None and exception == TimeoutError: print(active_player) raise TimeoutError except KeyboardInterrupt: raise KeyboardInterrupt except Exception as err: status = Status.EXCEPTION if exceptions: logger.error( ERR_INFO.format(err, initial_state, agents[0], agents[1], game_state, game_history)) traceback.print_exception(type(err), err, err.__traceback__) break finally: if time_limit and not debug: signal.signal(signal.SIGPROF, signal.SIG_IGN) # Unregister the timeout signal if action not in game_state.actions(): status = Status.INVALID_MOVE if exceptions: print( ERR_INFO.format('INVALID_MOVE', initial_state, agents[0], agents[1], game_state, game_history)) logger.error( ERR_INFO.format('INVALID_MOVE', initial_state, agents[0], agents[1], game_state, game_history)) break time_taken = time.perf_counter() - turn_start game_state = game_state.result(action) game_history.append(action) # Callbacks can be used to hook in additional functionality after each turn, such as verbose rendering # BUGFIX: don't modify callbacks, else the board position will be repeated multiple times per turn turn_callbacks = list(callbacks) if isinstance(callbacks, (tuple, list, set)) else [callbacks] if verbose: turn_callbacks = [verbose_callback] + callbacks for callback in turn_callbacks: if not callable(callback): continue callback(game_state=game_state, action=action, active_player=active_player, active_idx=active_idx, match_id=match_id, time_taken=time_taken) else: status = Status.GAME_OVER if game_state.utility(active_idx) > 0: winner, loser = loser, winner # swap winner/loser if active player won if logging: logger.info( RESULT_INFO.format(status, game_state, game_history, winner, loser)) return winner, game_history, match_id
winner_idx = agent_order.index(winner) loser = agent_order[int(not winner_idx)] scores[winner] += [1] scores[loser] += [0] for agent_idx, agent in enumerate(agent_order): if callable(getattr(agent.agent_class, 'backpropagate', None)): agent.agent_class.backpropagate(winner_idx=winner_idx, game_history=game_history) log_results(agents, scores, match_id, winner, start_time, args) TEST_AGENTS = { "RANDOM": Agent(RandomPlayer, "Random"), "GREEDY": Agent(GreedyPlayer, "Greedy"), "DISTANCE": Agent(DistancePlayer, "Distance"), "GD": Agent(GreedyDistancePlayer, "Greedy Distance"), "MINIMAX": Agent(MinimaxPlayer, "Minimax"), "ALPHABETA": Agent(AlphaBetaPlayer, "AlphaBeta"), "AREA": Agent(AlphaBetaAreaPlayer, "AlphaBeta Area"), "MCM": Agent(MCTSMaximum, "MCTS Maximum"), "MCR": Agent(MCTSRandom, "MCTS Random"), "MCMH": Agent(MCTSMaximumHeuristic, "MCTS Maximum Heuristic"), "MCRH": Agent(MCTSRandomHeuristic, "MCTS Random Heuristic"), "UCT": Agent(UCTPlayer, "UCT"), "SELF": Agent(CustomPlayer, "Custom TestAgent"), }
import logging from run_match import play_matches, TEST_AGENTS, NUM_PROCS, NUM_ROUNDS, TIME_LIMIT from argparse import Namespace from isolation import Agent from my_standard_player import AlphaBetaPlayer logger = logging.getLogger(__name__) args = Namespace() setattr(args, 'debug', False) setattr(args, 'fair_matches', True) setattr(args, 'processes', NUM_PROCS) setattr(args, 'rounds', NUM_ROUNDS) setattr(args, 'time_limit', TIME_LIMIT) if __name__ == "__main__": test_agent = Agent(AlphaBetaPlayer, "Alpha Beta Agent") custom_agent = TEST_AGENTS["SELF"] wins, num_games = play_matches(custom_agent, test_agent, args) logger.info("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print("Your agent won {:.1f}% of matches against {}".format( 100. * wins / num_games, test_agent.name)) print()
from collections import namedtuple from multiprocessing.pool import ThreadPool as Pool from isolation import Isolation, Agent, play from sample_players import RandomPlayer, GreedyPlayer, MinimaxPlayer, AlphaBetaPlayer from my_custom_player import CustomPlayer logger = logging.getLogger(__name__) NUM_PROCS = 1 NUM_ROUNDS = 5 # number times to replicate the match; increase for higher confidence estimate TIME_LIMIT = 150 # number of milliseconds before timeout TEST_AGENTS = { "RANDOM": Agent(RandomPlayer, "Random Agent"), "GREEDY": Agent(GreedyPlayer, "Greedy Agent"), "MINIMAX": Agent(MinimaxPlayer, "Minimax Agent"), "ALPHABETA": Agent(AlphaBetaPlayer, "AlphaBeta Agent"), "SELF": Agent(CustomPlayer, "Custom TestAgent") } Match = namedtuple("Match", "players initial_state time_limit match_id debug_flag") def _run_matches(matches, name, num_processes=NUM_PROCS, debug=False): results = [] pool = Pool(1) if debug else Pool(num_processes) print("Running {} games:".format(len(matches))) for result in pool.imap_unordered(play, matches):