def __init__(self, master=None, height=0, width=0): tkinter.Canvas.__init__(self, master, height=height, width=width) self.step_record_chess_board = Record.Step_Record_Chess_Board() # 初始化記步器 self.init_chess_board_points() # 畫點 self.init_chess_board_canvas() # 畫棋盤 self.board = MCTS.Board() self.AI = MCTS.MonteCarlo(self.board, 2) self.clicked = 0
def __init__(self, master=None, height=0, width=0): Tkinter.Canvas.__init__(self, master, height=height, width=width) self.step_record_chess_board = Record.Step_Record_Chess_Board() # 初始化記步器 self.height = 15 self.width = 15 self.init_chess_board_points() # 畫點 self.init_chess_board_canvas() # 畫棋盤 self.board = MCTS.Board() self.n_in_row = 5 self.n_playout = 400 # num of simulations for each move self.c_puct = 5 """ Important 1: Python is pass by reference So the self.board will be modified by other operations """ self.AI = MCTS.MonteCarlo(self.board, 1) self.AI_1 = MCTS.MonteCarlo(self.board, 0) self.clicked = 1 self.init = True # first place is given by user (later need to be replaced as a random selection) self.train_or_play = True # True - train, False - play self.step = 0 self.text_id = None
def generate_games(self, episodes, snapshots, batch_size, sim_time=0, rollouts_per_move=0, generate_random=False): # geenrate a game and add to replay_buffer print("Net will be cahced after the following episodes:", snapshots) self.actor.save(self.name + "0") if generate_random: generator = actors.Random(self.state_manager) else: generator = self.actor for i in range(1, episodes + 1): board = self.state_manager.get_start() MC = MCTS.MonteCarlo(self.start_player, self.state_manager, generator) while True: # Do rollouts for sim_time seconds/ rollouts_per_move rollouts MC.search(sim_time=sim_time, simulations=rollouts_per_move) distribution = MC.get_move_distribution() self.replay_buffer.append((board, distribution)) # Get next state based on rollouts board, move = MC.best_move() winner = self.state_manager.winner(board) if self.verbose: self.state_manager.print_move(move) self.state_manager.print_board(board) print() # set new root MC.purge_tree(board) if winner != 0: break # lock before accessing shared actor self.actor.train_network_random_minibatch(self.replay_buffer, batch_size=batch_size) if i in snapshots: self.actor.save(self.name + str(i))
M = 10000 # Number of simulations per move in actual game N = 25 # Number of starting stones K = 15 # Max number of stones a player can pick win_game_count_p1 = 0 game_count = 0 for g in range(G): if P == 0: start_player = random.randint(1, 2) else: start_player = P s_m = state_manager_nim.state_manager_nim(N, K, start_player) MC = MCTS.MonteCarlo(start_player, s_m) while True: MC.search(M) best_state, move = MC.best_move() if verbose: # player that made move, is the player whose turn it's not print("Player ", 1 if best_state[0] == 2 else 2, " made move: ", move, " :: Current state: ", best_state[1], sep='') winner = s_m.winner(best_state) if winner != 0: game_count += 1 print(game_count, "of", G, "games done") if winner == 1: win_game_count_p1 += 1 if verbose: print("Player", winner, "wins!")