def Expand(self, node, game_state): """ Function: Adds a random child to node, Updates game_state and node's untriedMoves Returns : The created child """ untriedMoves = node.untriedMoves m = choice(untriedMoves) # node's untriedMoves updated here node.untriedMoves.remove(m) # game_state updated here game_state = HexBoard.makeMove(m, game_state) # Check the child, if it is already appended return it otherwise create that child child = node.getChild(game_state) if child is not None: return child child_type = 'MIN' if node.type == 'MIN': child_type = 'MAX' child = Node(node_type=child_type, board_state=game_state.board, parent=node) # node's children updated here node.children.append(child) return child
def MCTS_Player(hyperparameters, isMaximizer): # First Computer's turn (MCTS) mcts_agent = MCTS( game=game, cp=hyperparameters[0], n=hyperparameters[1]) # initialize mcts agent move, params['node'] = mcts_agent.search( params['node'], hyperparameters[2], hyperparameters[3], isMaximizer) params['game'] = HexBoard.makeMove(move, params['game']) params['node'] = util.updateNode(params['node'], params['game'])
def PlayervsComputer(self): game = self.game self.clearOutput() game.print() node = Node(node_type='MIN', board_state=game.board, parent=None) # 1 - initialize node mcts_agent = MCTS(game=game, cp=0.8, n=100) # 3 - initialize mcts agent util = UTIL(infinity=np.inf, maximizer=game.maximizer, minimizer=game.minimizer) # 4 - initialize util class while not game.isGameOver(): if (game.turn % 2) == 0: move = self.getReady(game) game = HexBoard.makeMove(move, game) node = util.updateNode(node, game) else: print("Computer is thinking!!!") itermax = 100 # maximum iteration for search move, node = mcts_agent.search(node, itermax, delta=10, isMaximizer=True) print(f'best move: {move}') game = HexBoard.makeMove(move, game) node = util.updateNode(node, game) self.clearOutput() game.print() if game.isGameOver(): if game.checkWin(game.BLUE): print("!!! Blue Player Won !!!") elif game.checkWin(game.RED): print("!!! Red Player Won !!!")
def IDTT_Player(hyperparameters, isMaximizer): # iterative deepening with 4 depth-Dijkstra boardState = params['node'].board.copy() try: move = IterativeDeepeningTranspositionTable[ boardState.tobytes()] except KeyError: best_value = util.iterativeDeepening( params['node'], isMaximizer, hyperparameters[0], hyperparameters[1]) move = util.getBestMove(params['node'], best_value) IterativeDeepeningTranspositionTable[ boardState.tobytes()] = move params['game'] = HexBoard.makeMove(move, params['game']) params['node'] = util.updateNode(params['node'], params['game'])
def search(self, root, itermax, delta, isMaximizer): """Return the best moves based on MCTS""" end_time = datetime.now() + timedelta(seconds=delta) # now = time.time() while datetime.now() < end_time and itermax > 0: # We don't want to change the game, we will turn to it in each iteration game_state = deepcopy(self.game) # we will change the root (will be Expanded) node = root path = [root.board.tobytes()] # Select # node is fully expanded and non-terminal while (node.untriedMoves == []) and (node.children != []) and ( not game_state.isTerminal()): node = self.UCTSelectChild(node, isMaximizer) m = HexBoard.getMove(game_state.board, node.board) game_state = HexBoard.makeMove(m, game_state) path.append(node.board.tobytes()) # Expand if (node.untriedMoves != []) and (not game_state.isTerminal()): # node is expanded and updated with child node = self.Expand(node, game_state) path.append(node.board.tobytes()) # Playouts for p in range(self.N): # for each playout we want to return to same game_state _game = deepcopy(game_state) while not _game.isTerminal(): move = choice(HexBoard.getFreeMoves(_game.board)) _game = HexBoard.makeMove(move, _game) # This works just once for a particular node if node.wins == np.inf: node.wins = 0 if node.loss == np.inf: node.loss = 0 if _game.checkWin(_game.maximizer): node.wins += 1 else: node.loss += 1 # print(f'Playout {p} is done!') # Backpropagate # We are removing current node from path path.pop() while node is not None: # backpropagate works from the current node to the root node if len(path) > 0: parent = node.getParent(path.pop()) if parent.wins == np.inf: parent.wins = 0 parent.loss = 0 parent.wins += node.wins parent.loss += node.loss else: parent = None node.visit += 1 node = parent itermax -= 1 # print(f"Iteration completed!: It took {time.time() - now}s") sortedList = sorted(root.children, key=lambda c: c.visit) # return the move that was most visited return HexBoard.getMove(root.board, sortedList[-1].board), root