def best_move_mcts(self): ''' used if the game is not interactive. return the best move to play according to a monte carlo tree search parameters: n_sim is the number of simulation runed by the MCTS algorithm ''' #need to transform the current board in a 2D numpy array current_state = self.board_transformation() initial_board_state = TicTacToeGameState(state=current_state, next_to_move=1) #define the root of the monte carlo tree search ie the current state root = TwoPlayersGameMonteCarloTreeSearchNode( state=initial_board_state) #perform mcts mcts = MonteCarloTreeSearch(root) new_state = mcts.best_action( self.n_sim ).state.board #give the new 2D array corresponding to new state after optimal move #new_state and current_state only differ at one element #need to extract the position of this element and to convert it into a number between 1-9 new_move = np.argmax((new_state - current_state).reshape(1, 9)) + 1 assert new_move in np.arange(1, 10) return new_move
def test_tic_tac_toe_best_action(): state = np.zeros((10, 10)) initial_board_state = TicTacToeGameState(state=state, next_to_move=1) root = TwoPlayersGameMonteCarloTreeSearchNode(state=initial_board_state, parent=None) mcts = MonteCarloTreeSearch(root) return mcts.best_action(1000)
def get_move(self, state): if state.get_phase() == 1: initial_board_state = state.make_assumption() else: initial_board_state = state root = TwoPlayersGameMonteCarloTreeSearchNode(initial_board_state) mcts = MonteCarloTreeSearch(root) best_child = mcts.best_action(6500) best_move = best_child.move_played self.print_children_values(best_child) return best_move
def get_move(self, state): if state.get_phase() == 1: initial_board_state = state.make_assumption() root = Node(initial_board_state) mcts = MonteCarloTreeSearch(root) start_time = time.time() best_move = mcts.best_move(5000) end_time = time.time() print(end_time-start_time) return best_move else: val, move = self.value(state) return move
def play_mcts(state, num_simulations=None, total_simulation_seconds=1): current_player = state._current_player tiles_by_player = state._tiles_by_player aux_state = DominoState( 0, { 'tiles_by_player': rotate(tiles_by_player, current_player), 'suits_at_ends': state._suits_at_ends }) root = TwoPlayersGameMonteCarloTreeSearchNode( state=DominoGameState(aux_state)) mcts = MonteCarloTreeSearch(root) best_action = mcts.best_action( simulations_number=num_simulations, total_simulation_seconds=total_simulation_seconds).state._state.action return state.next_state_from_action( DominoAction(current_player, best_action.tile, best_action.suit_played))
def monte_carlo_best_card(n, seconds, best_previous, deck, cards_on_table, my_points, my_cards, my_total_points, my_taken_cards, opp_points, opp_cards, opp_total_points, opp_taken_cards, who_is_first, last_taken_by): '''Returns the index of the best card, that should be played according to monte carlo simulation''' initial = ZingGameState(best_previous, deck, cards_on_table, my_points, my_cards, my_total_points, my_taken_cards, opp_points, opp_cards, opp_total_points, opp_taken_cards, who_is_first, last_taken_by) root = TwoPlayersGameMonteCarloTreeSearchNode(state=initial) mcts = MonteCarloTreeSearch(root) best_node = mcts.best_action(n, seconds) move = best_node.state.previous_move # print('Deck length at best move: ' + str(len(best_node.state.deck))) # print('Cards available at best move') # print_cards_inline(best_node.state.my_cards) return move
deck = shuffle_deck(create_deck()) #cards_on_table = list() card_underneath = deck[-1] who_is_first = 2 cards_on_table = deck[:4] deck = deck[4:] my_cards, opp_cards, deck = deal_cards(who_is_first, deck) my_points, opp_points = 0, 0 my_taken_cards = list() opp_taken_cards = list() last_taken_by = 0 initial = ZingGameState(None, deck, cards_on_table, my_points, my_cards, my_total_points, my_taken_cards, opp_points, opp_cards, opp_total_points, opp_taken_cards, who_is_first, last_taken_by) root = TwoPlayersGameMonteCarloTreeSearchNode(state=initial) mcts = MonteCarloTreeSearch(root) t1 = time.time() best_node = mcts.best_action(None, 0.5) t2 = time.time() print(t2 - t1) move = best_node.state.previous_move
#randomize first move cathedralLegalMoves = game.getLegalMoves() index = np.random.randint(len(cathedralLegalMoves)) initMove = cathedralLegalMoves[index] game.placePiece(initMove[0], initMove[1], initMove[2]) state = None while keepGoing: #alternate which player uses the NN or strictly uses MCTS root = NeuralNetNode(CathedralState(game), clfRef, oneSided=(Cathedral.lightPlayer if g % 2 == 0 else Cathedral.darkPlayer)) mcts = MonteCarloTreeSearch(root) best_node = mcts.best_action(mctsIterations) state = best_node.state game = state.game game.printIds() keepGoing = not best_node.state.game.isGameOver() inputData = state.raw(None) label = state.game_result # print ("light: ", game.getScore(Cathedral.lightPlayer)) # print ("light remaining", game.lightPiecesLeft) # print ("dark: ", game.getScore(Cathedral.darkPlayer)) # print ("dark remaining", game.darkPiecesLeft) with open(path.join(thisPath, 'stats', gameFile), "a+") as f:
def mcts_decision(state, num_simulations=None, total_simulation_seconds=1): root = TwoPlayersGameMonteCarloTreeSearchNode(state=DominoGameState(state)) mcts = MonteCarloTreeSearch(root) return mcts.best_action( simulations_number=num_simulations, total_simulation_seconds=total_simulation_seconds).state._state
# Copied from README import numpy as np from mctspy.tree.nodes import TwoPlayersGameMonteCarloTreeSearchNode from mctspy.tree.search import MonteCarloTreeSearch from mctspy.games.examples.tictactoe import TicTacToeGameState state = np.zeros((3, 3)) initial_board_state = TicTacToeGameState(state=state, next_to_move=1) root = TwoPlayersGameMonteCarloTreeSearchNode(state=initial_board_state) mcts = MonteCarloTreeSearch(root) best_node = mcts.best_action(10000)