def make_training_move(board_state, side): if cnn_on: # We must have the first 3x3 board as first 9 entries of the list, second 3x3 board as next 9 entries etc. # This is required for the CNN. The CNN takes the first 9 entries and forms a 3x3 board etc. """If the 10 split 3x3 boards are desired, use create_3x3_board_states(board_state) here""" np_board_state = create_3x3_board_states(board_state) else: np_board_state = np.array(board_state) np_board_state[np_board_state > 1] = 0 mini_batch_board_states.append( np_board_state * side ) # append all states are used in the minibatch (+ and - determine which player's state it was) rand_numb = random.uniform(0., 1.) if rand_numb < eps: move = get_random_network_move(board_state, game_spec) elif deterministic: move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) else: if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) move_for_game = np.asarray( move ) # The move returned to the game is in a different configuration than the CNN learn move if cnn_on: # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state), # the same should happen for the mini batch moves move = create_3x3_board_states(np.reshape( move, [9, 9])) # The function requires a 9x9 array mini_batch_moves.append(move[0:81]) else: mini_batch_moves.append(move) return game_spec.flat_move_to_tuple(move_for_game.argmax())
def make_training_move(board_state, side): if cnn_on: np_board_state = create_3x3_board_states(board_state) else: np_board_state = np.array(board_state) mini_batch_board_states.append(np_board_state * side) rand_numb = random.uniform(0., 1.) if rand_numb < eps: move = get_random_network_move(board_state, game_spec) elif deterministic: move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) else: if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) move_for_game = np.asarray( move ) # The move returned to the game is in a different configuration than the CNN learn move if cnn_on: # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state), # the same should happen for the mini batch moves move = create_3x3_board_states(np.reshape( move, [9, 9])) # The function requires a 9x9 array mini_batch_moves.append(move[0:81]) else: mini_batch_moves.append(move) return game_spec.flat_move_to_tuple(move_for_game.argmax())
def player_func(board_state, side): if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: move = get_deterministic_network_move(session, input_layer, output_layer, board_state, side, valid_only=True, game_spec=game_spec) move_for_game = np.asarray( move ) # The move returned to the game is in a different configuration than the CNN learn move return game_spec.flat_move_to_tuple(move_for_game.argmax())
def make_move_historical(net, board_state, side): if mcts: _, move = monte_carlo_tree_search(game_spec, board_state, side, 27, session, input_layer, output_layer, True, cnn_on, True) else: # move = get_deterministic_network_move(session, net[0], net[1], board_state, side, # valid_only = True, game_spec = game_spec, cnn_on = cnn_on) move = get_stochastic_network_move(session, net[0], net[1], board_state, side, valid_only=True, game_spec=game_spec, cnn_on=cnn_on) move_for_game = np.asarray( move) # move must be an array, mcts doesn't return this return game_spec.flat_move_to_tuple(move_for_game.argmax())
def test_move(self): # Test single move of monte carlo tree search algorithm game_spec = ut.UltimateTicTacToeGameSpec() # generate board with 10 random moves random_func = game_spec.get_random_player_func() board_state = ut._new_board() side = 1 for _ in range(10): move = random_func(board_state, side) board_state = game_spec.apply_move(board_state, move, side) side = -1 * side print("") ut.print_board_state(board_state, side) result, move = mc._monte_carlo_sample(game_spec, board_state, side) print("result: ", result) print("move: ", move) mc_func = game_spec.get_monte_carlo_player_func() result, move = mc.monte_carlo_tree_search(game_spec, board_state, side, 100) print(result) print(move)
def monte_carlo_player(self, board_state, side, uct, number_of_samples): if uct: _, move = mc.monte_carlo_tree_search_uct(self, board_state, side, number_of_samples) else: _, move = mc.monte_carlo_tree_search(self, board_state, side, number_of_samples) return move