def MCTS_Search(board, player, num_reads, n_net): root = Node(board, checkers.get_all_moves(board, player), player, move=None, parent=ParentRootNode()) for i in range(num_reads): leaf = root.select_leaf() player = checkers.switch_player(player) child_prior_prob, value = n_net( checkers.get_state2(leaf.board, leaf.player)) # print(child_prior_prob) # print("The number of reads", i) if checkers.isTerminal(board) or checkers.get_all_moves( leaf.board, leaf.player) == []: print("Finished Game") leaf.backpropagate(value) leaf.print_tree() else: child_prior_prob = child_prior_prob.cpu().detach().numpy().reshape( -1) leaf.expand_and_evaluate(child_prior_prob) leaf.backpropagate(value) root.print_tree() return root
def MCTS_self_play(nnet, num_games, s_index, iteration): data_x = [] for itt in tqdm(range(s_index, num_games + s_index)): board = checkers.initial_board(board_size, board_size) #board = checkers.initial_b6() player = 1 data = [] value = 0 num_moves = 0 t = 1 while checkers.isTerminal(board, player) is not True: # if num_moves > 15: # t = 0.1 root = MCTS_Search(board, player, roll_out, nnet) # print("The turn of player {:d} and Moves {:d}".format(player, num_moves)) # checkers.print_board(root.board) policy = get_policy(root, t) data.append([board, player, policy]) move = np.argmax(policy) board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1], root.player) player = checkers.switch_player(player) if len(checkers.get_all_moves(board, player)) == 0: # Player == 1 means White pieces # print("Game Finished") if player == 1: value = -1 elif player == 2: value = 1 else: value = 0 break if num_moves == 150: value = 0 break num_moves += 1 for ind, dx in enumerate(data): s, pl, po = dx if ind == 0: data_x.append([checkers.get_state2(s, pl), po, 0]) else: data_x.append([checkers.get_state2(s, pl), po, value]) del data # filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt) # save_data(filename, data_x) return data_x
def MCTS_self_play(nnet, num_games, s_index, iteration): for itt in tqdm(range(s_index, num_games + s_index)): board = checkers.initial_board(board_size, board_size) player = 1 data = [] value = 0 num_moves = 0 t = 1 while checkers.isTerminal(board): if num_moves > 15: t = 0.1 root = MCTS_Search(board, player, 500, nnet) policy = get_policy(root, t) data.append([board, player, policy]) move = np.argmax(policy) board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1], root.player) player = checkers.switch_player(player) if len(checkers.get_all_moves(board, player)) == 0: # Player == 1 means White pieces if player == 1: value = 1 elif player == 2: value = -1 else: value = 0 if num_moves == 150: value = 0 break num_moves += 1 data_x = [] for ind, d in enumerate(data): s, pl, po = d if ind == 0: data_x.append([s, pl, po, 0]) else: data_x.append([s, pl, po, value]) del data filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt) save_data(filename, data_x) return
def MCTS_Play_WithRandom(nnet, num_games): number_of_wins = 0 number_of_draws = 0 for itt in tqdm(range(num_games)): board = checkers.initial_board(board_size, board_size) # board = checkers.initial_b6() player = 1 num_moves = 0 t = 1 while checkers.isTerminal(board, player) is not True: # if num_moves > 15: # t = 0.1 if player == 1: root = MCTS_Search(board, player, roll_out, nnet) policy = get_policy(root, t) move = np.argmax(policy) board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1], root.player) else: move = checkers.get_random_move(board, player) board = checkers.apply_move(board, move[0], move[1], player) # print("The turn of player {:d} and Moves {:d}".format(player, num_moves)) # checkers.print_board(board) player = checkers.switch_player(player) if len(checkers.get_all_moves(board, player)) == 0: # Player == 1 means White pieces # print("Game Finished") if player == 2: number_of_wins += 1 break if num_moves == 200: number_of_draws += 1 break num_moves += 1 return number_of_wins, number_of_draws