def minimax(self, board, player, mode='regular'): # print('minimax init') P1 = T.X if mode == 'regular' else T.O P2 = T.opponent_of(P1) if self.check_status(board, P1): return {'score': -10} elif self.check_status(board, P2): return {'score': +10} elif self.check_status(board, T.D): return {'score': 0} availible_positions = self.get_empty_positions(board) moves = [] for k, pos in enumerate(availible_positions): move = {} # board[pos.i][pos.j] = player board.perform_move(player, pos) move['pos'] = pos minimax_move = self.minimax(board, T.opponent_of(player)) move['score'] = minimax_move['score'] # board[pos.i][pos.j] = T.E board.unperform_move(T.E, pos) moves.append(move) if player == T.O: best_move = max(moves, key=lambda move: move['score']) else: best_move = min(moves, key=lambda move: move['score']) return best_move
def player_vs_AI(n=3): board = Board() player = T.X mcts = MonteCarloTreeSearch() board.print_board() for move in range(9): if move % 2 == 0: input_position = input('Enter move in form [i j]:') i, j = [int(k) for k in input_position.split(' ')] pos = Position(i, j) board.perform_move(player, pos) else: tree, board = mcts.find_next_move(board, player) if board.check_status() == T.X: print('You win!') board.print_board() break elif board.check_status() == T.O: print('You lose!') board.print_board() break board.print_board() player = T.opponent_of(player)
def backpropogate(self, exploration_node, player): temp_node = exploration_node while not temp_node.is_root(): temp_node.state.increment_visit() if temp_node.state.player == player: temp_node.state.add_score(MonteCarloTreeSearch.WIN_SCORE) elif temp_node.state.player == T.opponent_of(player): temp_node.state.add_score(-MonteCarloTreeSearch.WIN_SCORE) temp_node = temp_node.parent
def AI_vs_AI(n=3, mode='regular'): board = Board() player = T.X minimax = Minimax() board.print_board() while board.check_status() == T.E: print('[play_minimax.py]:', T.num_to_symbol[board.check_status()]) _, board = minimax.find_next_move(board, player, mode=mode) player = T.opponent_of(player) board.print_board()
def find_next_move(self, board, player): self.opponent = T.opponent_of(player) # Create new game instance. # Tree with root note: no parent # TODO: this is creating a node with children of the same number of moves in... tree = Tree(Node(State(board, self.opponent), None)) root_node = tree.root root_node.children = [] root_node.state.visit_count = 1 reference_time = current_time() while (current_time() - reference_time) <= (MonteCarloTreeSearch.time_limit_per_move): # print('[Tic Tac Toe.find_nextMove]:', current_time() - reference_time, MonteCarloTreeSearch.time_limit_per_move + epsilon,) # -- Step 1 - Selection -- # print('[Tic Tac Toe.find_nextMove]:', root_node, root_node.children) if root_node in root_node.children: raise ValueError('Circular reference') promising_node = self.select_promising_node(root_node) # print('[Tic Tac Toe.find_nextMove] - root board:', root_node.state.board.print_board()) # print('[Tic Tac Toe.find_nextMove] - promising:', promising_node.state.board.print_board()) # -- Step 2 - Expansion -- # print('[Tic Tac Toe.find_nextMove] - Board status:', promising_node.state.board.check_status()) # promising_node.state.board.print_board() # Why is board status == 1 so early on? if promising_node.state.board.check_status() == T.E: # game in progress self.expand_node(promising_node) # -- Step 3 - Simulation -- exploration_node = promising_node if not promising_node.is_leaf(): exploration_node = promising_node.get_random_child() # simulate random playout playout_result = self.simulate_random_playout(exploration_node) # -- Step 4 - Update -- self.backpropogate(exploration_node, playout_result) # print('[Tic Tac Toe.find_next_move]:') # exploration_node.state.board.print_board() # print('[Tic Tac Toe.find_next_move]', exploration_node, exploration_node.children) # for node in root_node.children: # node.state.board.print_board() winner_node = root_node.get_child_with_max_score() # print('[Tic Tac Toe.find_next_move]', winner_node, winner_node.state.win_score) tree.root = winner_node return tree, winner_node.state.board
def AI_vs_AI(n=3): board = Board() player = T.X mcts = MonteCarloTreeSearch() # board.perform_move(player, Position(0,0)) # board.print_board() # 1 + 2 print('AI turn:') board.print_board() while board.check_status() == T.E: print('[play_mcts.py]:', T.num_to_symbol[board.check_status()]) tree, board = mcts.find_next_move(board, player) player = T.opponent_of(player) board.print_board() # tree.print_tree_boards() pass
def get_opponent(self): return T.opponent_of(self.player)