def rollout(self, node): rollout_mat = np.copy(node.game_state) zeros = np.where(rollout_mat == 0) unvisited_node_list = list(zip(zeros[0], zeros[1])) random.shuffle(unvisited_node_list) board_full = len(np.where(rollout_mat == 0)[0]) == 0 if node.move is not None: have_winner = check_for_win(rollout_mat, node.move) else: have_winner = None cur_player = node.player while have_winner is None and (not board_full): cur_player *= -1 move = unvisited_node_list.pop() rollout_mat[move[0]][move[1]] = cur_player # Check whether the board is full board_full = len(np.where(rollout_mat == 0)[0]) == 0 have_winner = check_for_win(rollout_mat, move) if board_full: return 0 else: return have_winner
def policy_rollout(self, rollout_mat, move, player): zeros = np.where(rollout_mat == 0) unvisited_node_list = list(zip(zeros[0], zeros[1])) random.shuffle(unvisited_node_list) board_full = len(np.where(rollout_mat == 0)[0]) == 0 if move is not None: have_winner = check_for_win(rollout_mat, move) else: have_winner = None cur_player = player while have_winner is None and (not board_full): cur_player *= -1 move = unvisited_node_list.pop() rollout_mat[move[0]][move[1]] = cur_player # Fast model rollout # X_input = self._rollout_encoder.encode(rollout_mat,cur_player, move) # position_priority = list(np.argsort(self._rollout_model.predict(X_input))[0][::-1]) # for position in position_priority: # i = position // 8 # j = position % 8 # if rollout_mat[i][j] == 0: # break # rollout_mat[i][j] = cur_player # Check whether the board is full board_full = len(np.where(rollout_mat == 0)[0]) == 0 have_winner = check_for_win(rollout_mat, move) if board_full: return 0 else: return have_winner
def select_move(self, mat, move): cur_time = time.time() root = MCTSNode(mat, self.cur_player, move=move) simulation_num = 0 #for i in range(self.simulation_number): while time.time() - cur_time < 10: node = root # Step 1: Selection while node.move is not None and ( not node.can_add_child()) and check_for_win( node.game_state, node.move) is None: node = self.select_child(node) # Step 2: Expansion if node.can_add_child(): node = node.expansion() # Step 3: Rollout and get the winner winner = self.rollout(node) # Step 4: Back propagate to update the score while node is not None: if node.player == winner: node.win_num += 1 if winner == 0: node.win_num += 0.5 node.visit_num += 1 node = node.parent simulation_num += 1 visited_num_mat = np.zeros((8, 8)) winning_num_mat = np.zeros((8, 8)) winning_percent_mat = np.zeros((8, 8)) best_move_mat = None best_move = None best_visit_num = -1.0 for child in root.children: child_visit_num = child.visit_num move = child.move visited_num_mat[move[0]][move[1]] = child.visit_num winning_percent_mat[move[0]][move[1]] = np.round( child.winning_percent(), 3) winning_num_mat[move[0]][move[1]] = child.win_num if child_visit_num > best_visit_num: best_visit_num = child_visit_num best_move_mat = child.game_state best_move = child.move #print(visited_num_mat) pprint_tree(root) return visited_num_mat, simulation_num
def select_move(self, mat, move): time_start = time.time() self.root = AlphaGomokuNode(mat, self.cur_player, move=move) for simulation in range(self.simulation_number): current_state = mat node = self.root for depth in range(self.depth): if not node.children: if check_for_win(node.game_state, node.move) is not None: break moves, probabilities = self.policy_probabilities( current_state, move) node.expand_children(moves, probabilities) move, node = node.select_child() current_state = np.copy(node.game_state) current_state[move[0]][move[1]] = node.player * -1 current_state_input = self._deep_encoder.encode( current_state, node.player, move) value = self._value_model.predict(current_state_input)[0][0] if node.player == -1: value *= -1 rollout = self.policy_rollout(current_state, move, node.player) weighted_value = ( 1 - self.lambda_value) * value + self.lambda_value * rollout node.update_values(weighted_value) move = max(self.root.children, key=lambda move: self.root.children.get(move).visit_count) mat[move[0]][move[1]] = self.cur_player pprint_tree(self.root) print("Total time spent", time.time() - time_start) return mat, move
def best_child(node): for child in node.children.items(): if check_for_win(child[1].state) == -1: return child[1] return max(node.children.values(), key=lambda x: x.visited_number)
def is_terminal(self): result, _ = check_for_win(self.game_state, self.move) return result