def mm_find_best(): """MiniMax find best move (max_layers=3).""" t = mm_tg() t.vline(3, 0, 15, t.body_of(1)) mm = MiniMax(t, mm_player()) mm.find_best_move(max_layers=3)
def test_catch(tg, player): """Test catching the other player.""" tg.vline(3, 0, 15, tg.body_of(1)) mm = MiniMax(tg, player) weight, move = mm.find_best_move(max_layers=3) assert move == 'LEFT'
def __init__(self, shape, depth_lim=9): Computer.__init__(self, shape) evaluator = Evaluator() self.mini_max_obj = MiniMax(evaluator.eval, self.shape, self.other_shape()) self.depth_lim = depth_lim self.name_str = 'MiniMax'
def action_called(self): # get the button that called the action button = self.sender() # disable button button.setEnabled(False) # traverse through the buttons to get the coords of button (there is probably a better way to do this) for i in range(len(self.push_list)): for j in range(len(self.push_list)): if button == self.push_list[i][j]: move = (i, j) break if self.is_ai is True: # set the text of the button to X button.setText(self.game.get_X_player()) # make the move in the game self.game.make_move(self.game.get_X_player(), move) # if the game is unfinished, make the AI move if self.game.get_status() == "UNFINISHED": game_object = copy.deepcopy(self.game) ai = MiniMax(game_object) ai_move = ai.minimax(game_object) button = self.push_list[ai_move[0]][ai_move[1]] button.setText(self.game.get_O_player()) button.setEnabled(False) self.game.make_move(self.game.get_O_player(), ai_move) else: turn = self.game.get_current_player() if turn == self.game.get_X_player(): button.setText(self.game.get_X_player()) self.game.make_move(self.game.get_X_player(), move) else: button.setText(self.game.get_O_player()) self.game.make_move(self.game.get_O_player(), move) # determine if there is a win or draw win = self.game.get_status() # set the game status label to empty text text = "" if win == "X_WON": text = "X WON" if win == "O_WON": text = "O WON" if win == "DRAW": text = "DRAW" self.label.setText(text)
def go(self): """Act depending if we see others.""" if self.can_see_others(): mm = MiniMax(self.grid, self.players[self.my_number], full_bfs=self.full_bfs) weight, move = mm.find_best_move(max_layers=self.max_layers, max_layer_size=self.max_layer_size) mm.unlink_states() if weight > 0: return move return self.go_wander()
def aiPlay(self): m = MiniMax(self.gameBoard) best_move = m.bestMove(5, self.gameBoard, self.currentTurn) #print("best move: ", best_move) self.playPiece(best_move) # print('\n\nmove %d: Player %d, column %d\n' % (self.pieceCount, self.currentTurn, randColumn+1)) if self.currentTurn == 1: self.currentTurn = 2 elif self.currentTurn == 2: self.currentTurn = 1
def test_obvious_win(self): #player 1 should go for the win here gameArr = [ [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0] ] m = MiniMax(gameArr) best_move = m.bestMove(5, gameArr, 1) self.assertEqual(best_move, 0)
def test_diagnol_win(self): # player 2 has win on diagnol gameArr = [ [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 1, 2, 1, 0, 0, 0], [0, 1, 1, 2, 0, 0, 0], [0, 2, 1, 2, 2, 0, 0], [1, 2, 2, 2, 1, 0, 0] ] m = MiniMax(gameArr) best_move = m.bestMove(5, gameArr, 2) self.assertEqual(best_move, 1)
def test_game_end_full(self): # player 2 has win on diagnol gameArr = [ [1, 2, 1, 2, 1, 2, 1], [2, 1, 2, 1, 2, 1, 2], [1, 1, 2, 2, 1, 1, 2], [2, 2, 1, 1, 1, 2, 2], [2, 1, 1, 2, 2, 1, 1], [1, 2, 2, 2, 2, 2, 2] ] m = MiniMax(gameArr) best_move = m.bestMove(5, gameArr, 2) self.assertEqual(best_move, None)
def test_game_end_win(self): gameArr = [ [0, 0, 0, 0, 0, 0, 0], [0, 2, 0, 0, 0, 0, 0], [0, 1, 2, 1, 0, 0, 0], [0, 1, 1, 2, 0, 0, 0], [0, 2, 1, 2, 2, 0, 0], [1, 2, 2, 2, 1, 0, 0] ] m = MiniMax(gameArr) best_move = m.bestMove(5, gameArr, 2) self.assertEqual(best_move, None)
def test_block(self): #palyer 2 should block player 1 here gameArr = [ [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 2, 0, 0, 0, 1, 0], [0, 2, 2, 1, 0, 1, 0] ] m = MiniMax(gameArr) best_move = m.bestMove(5, gameArr, 2) self.assertEqual(best_move, 5)
def test_init(tg, player): """Test initialization.""" mm = MiniMax(tg, player) assert mm.my_number == 1 assert mm.my_pos == tg.coords2index(2, 15) assert mm.opponents == {2: tg.coords2index(0, 5)}
def fit(self): random = RandomMove() minimax = MiniMax(max_depth=9) agents = np.array([random, self]) state = np.zeros(n_size * n_size) for i in range(20001): np.random.shuffle(agents) extended_boards, extended_actions, rewards, unfinished_flags, _ = play( agents) for board_sequence, action_sequence in zip(extended_boards, extended_actions): for state, next_state, action, reward, unfinished in zip( board_sequence[:-1], board_sequence[1:], action_sequence, rewards, unfinished_flags): state_hash = self.hash(state) next_hash = self.hash(next_state) self.q[state_hash][action] += self.alpha * ( reward + self.gamma * unfinished * np.amax(self.q[next_hash]) - self.q[state_hash][action]) if i % 1000 == 0: print(f'iteration {i}\t\t\twin/draw/lose') print('minimax vs. q learning', test([minimax, self])) print('q learning vs. minimax', test([self, minimax])) print('random vs. q learning', test([random, self])) print('q learning vs. random', test([self, random]))
def test_first_layer(tg, player): """Test computing the first layer in open field.""" mm = MiniMax(tg, player) mm.compute_next_layer() assert len(mm.layers) == 1 assert len(mm.layers[0]) == 4 assert {state.moves[0].direction for state in mm.layers[0]} ==\ set(tg.DIRECTIONS.keys()) for state in mm.layers[0]: assert len(state.moves) == 1 move = state.moves[0] assert move.player_number == 1 assert move.is_mine is True assert mm.my_pos + tg.DIRECTIONS[move.direction] == state.player2pos[1]
def get_agent(self, task_id, board, depth): if task_id == self.GBFS: return GBFS(board) elif task_id == self.MINIMAX: return MiniMax(board, depth) else: return AlphaBeta(board, depth)
def main(): minimax = MiniMax(max_depth=9) mcts = MCTS() random = RandomMove() test([mcts, mcts]) print('\t\t\t\twin/draw/lose') print('mcts vs. mcts', test([mcts, mcts])) print('random vs. mcts', test([random, mcts])) print('mcts vs. random', test([mcts, random])) print('minimax vs. mcts', test([minimax, mcts])) print('mcts vs. minimax', test([mcts, minimax]))
def test_second_layer(tg, player): """Test computing two layers in open field with opponent next to wall.""" mm = MiniMax(tg, player) mm.compute_next_layer() mm.compute_next_layer() assert len(mm.layers) == 2 assert len(mm.layers[0]) == 4 assert len(mm.layers[1]) == 12 for state in mm.layers[0]: assert state.next_player == 2 assert len(state.next_states) == 3 assert {nstate.moves[1].direction for nstate in state.next_states} ==\ set(tg.DIRECTIONS.keys()) - {'LEFT'} for nstate in state.next_states: assert nstate.player_number == 2 assert nstate.prev_state == state assert len(nstate.moves) == 2 move = nstate.moves[1] assert move.is_mine == False assert move.player_number == 2 assert mm.opponents[2] + tg.DIRECTIONS[move.direction] ==\ nstate.player2pos[2]
class PrunePlayer(Computer): def __init__(self, shape, depth_lim=9): Computer.__init__(self, shape) evaluator = Evaluator() self.mini_max_obj = MiniMax(evaluator.eval, self.shape, self.other_shape()) self.depth_lim = depth_lim self.name_str = 'Prune' def get_move(self): if self.current_board.count_empty() == 9: pos = [random.choice([0, 1, 2]), random.choice([0, 1, 2])] else: start_time = time.time() score, pos = self.mini_max_obj.minimax_alphabeta( self.current_board, self.depth_lim, self.shape, self.other_shape()) end_time = time.time() print(f'Elapsed time (Pruned): {end_time - start_time}') return pos
class SmartestPlayer(Computer): def __init__(self, shape, depth_lim=9): Computer.__init__(self, shape) evaluator = Evaluator() self.mini_max_obj = MiniMax(evaluator.eval, self.shape, self.other_shape()) self.depth_lim = depth_lim self.name_str = 'MiniMax' def get_move(self): if self.current_board.count_empty() == 9: pos = [random.choice([0, 1, 2]), random.choice([0, 1, 2])] else: start_time = time.time() move = self.mini_max_obj.minimax(self.current_board, self.depth_lim, self.shape, self.other_shape()) end_time = time.time() print(f'Elapsed time (Alpha-Beta): {end_time - start_time}') pos = [move[1], move[2]] return pos
def fit(self): random = RandomMove() minimax = MiniMax(max_depth=9) agents = [minimax, self] while self.states.shape[0] < self.training_size: # np.random.shuffle(agents) play(agents, self) for iteration in range(self.n_episodes): self.eps *= self.eps_decay # np.random.shuffle(agents) play(agents, self) print('iteration:', iteration, 'eps:', self.eps) for i in range(10): self.replay() if iteration % 10 == 0: self.target_net.copy_weights(self.policy_net) temp_eps = self.eps self.eps = 0 print('\t\t\t\twin/draw/lose') print('minimax vs. dqn', test([minimax, self])) print('dqn vs. minimax', test([self, minimax])) print('random vs. dqn', test([random, self])) print('dqn vs. random', test([self, random])) self.eps = temp_eps
from minimax import MiniMax from alphabeta import AlphaBeta from qlearning import QLearning import time game = TicTacToe() user = int(input("Player1(X) or Player2(O):")) ai = 2 if user == 1 else 1 ai_algorithm = input("""Choose your opponent: 1. MiniMax Algorithm 2. MiniMax with Alpha-Beta Pruning 3. Q-Learning Agent """) if ai_algorithm == "1": agent = MiniMax(player=ai) elif ai_algorithm == "2": agent = AlphaBeta(player=ai) elif ai_algorithm == "3": agent = QLearning(player=ai) agent.epsilon = 0 agent.load_q_table() while True: game.render() print("-----------------------------------------------------------------") if game.turn == user: action = int(input("Action (0-8):")) done = game.step(action) if done: game.render()
def minimax(self): agent = MiniMax(self.board, self.depth) board = agent.get_next_board() agent.output_next_state(board) agent.output_log()
def _evalState(self, s): m = MiniMax(depth=self._depth) if self._useDelta: return min(max(m.getBoardScoreDelta(s), -12.0), 12.0) else: return min(max(m.getBoardScore(s), -12.0), 12.0)
# %% import numpy as np import matplotlib.pyplot as plt # %% from tictactoe import Board, X, O from minimax import MiniMax from expectiminimax import ExpectiMiniMax from plot import plot_board_score, clean_square # %% [markdown] heading_collapsed=true # ## Optimal adversary # %% hidden=true engine = MiniMax() b = Board() engine.search(b) # %% hidden=true engine[b] # %% hidden=true # %% hidden=true engine[Board((1, 0, 0, 0, 0, 0, 0, 0, 0))] # %% hidden=true a = (1, 0, 2, 0, 0) b = (0, 0, 2, 0, 0) tuple(a_ ^ b_ for a_, b_ in zip(a, b))