def play_web_game(is_stop, player1: Player, player2: Player, turn_to, send_board_step, send_player1_running, send_player2_running, wait_human_action, game_over): board = Board() while not is_stop(): turn_to(board.current_player) if board.current_player == BOARD.o: if isinstance(player1, Human): action = wait_human_action(1, is_stop) if is_stop(): return board.step(action) else: action = player1.take_action(board, is_output_action=False, running_output_function=send_player1_running, is_stop=is_stop) send_board_step(1, action) else: if isinstance(player2, Human): action = wait_human_action(2, is_stop) if is_stop(): return board.step(action) else: action = player2.take_action(board, is_output_action=False, running_output_function=send_player2_running, is_stop=is_stop) send_board_step(2, action) is_over, winner = board.result() if is_over: game_over(winner) return
def rollout_policy(self, board: Board): """ Decision function, random decision here. :param board: The board. """ # Randomly execute actions. board.step(random.choice(list(board.available_actions)))
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ The AI player take action next step. :param board: Current board. :param is_output_action: Whether to output action information. :param running_output_function: running output function. :param is_stop: Ask whether to stop. :return: <tuple (i, j)> Coordinate of the action. """ if is_output_action: print("It's turn to {0}, AI player.".format(self.name)) print("Thinking ASAP ...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() if is_output_action: print("AI player {0} moves ({1}, {2})".format(self.name, action[0], action[1])) return action
def rollout_policy(self, board: Board): """ 决策函数,选择子节点的概率决策。 Policy function, a probabilistic decision to select child nodes. :param node: 当前节点。 Current node. :param board: :return: <TreeNode> 决策出的节点。 The decision node. """ c1 = datetime.datetime.now() # 所有执行动作的概率相同。 All actions have the same probability. actions = list(board.available_actions) probs = np.ones(len(actions)) / len(actions) c2 = datetime.datetime.now() # 获得动作和概率。 Get action and probability. action_index = np.random.choice(range(len(actions))) action = actions[action_index] prob = probs[action_index] c3 = datetime.datetime.now() # 执行。 Action. board.step(action) c4 = datetime.datetime.now() time = ((c2 - c1).microseconds, (c3 - c2).microseconds, (c4 - c3).microseconds)
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): if is_output_action: print("{0} It's turn to {0}, AI player.".format(self.name)) print("Thinking...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() if is_output_action: print("IA {0} ({1}, {2})\nAI player {0} moves ({1}, {2})".format( self.name, action[0], action[1])) return action
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ 下一步 AI 玩家执行动作。 The AI player take action next step. :param board: 当前棋盘。 Current board. :param is_output_action: 是否输出 action 信息。 Whether to output action information. :param running_output_function: 输出 running 的函数。 running output function. :param is_stop: 询问是否停止。 Ask whether to stop. :return: <tuple (i, j)> 采取行动时,落子的坐标。 Coordinate of the action. """ if is_output_action: print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format( self.name)) print("思考中。。。 Thinking...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() if is_output_action: print("AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})". format(self.name, action[0], action[1])) return action
def traverse(self, node: TreeNode, board: Board): """ :param node: :param board: :return: """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, winner = board.result() if is_over: if winner == board.current_player: value = 1.0 elif winner == -board.current_player: value = -1.0 else: value = 0.0 return node, value action_probs, value = self.policy_value_function(board) for action, probability in action_probs: _ = node.expand(action, probability) return node, value
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ The AI player take action next step. :param board: Current board. :param is_output_action: Whether to output execution actions. :param running_output_function: running output function. :param is_stop: Ask whether to stop. :return: <tuple (i, j)> Coordinate of the action. """ if is_output_action: print("It's turn to {0}, AI player.".format(self.name)) print("Thinking ASAP ...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) # Get actions and probabilities. actions, probs = self.get_action_probs() # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) if self.is_training: # add Dirichlet Noise for exploration in training. flatten_action = np.random.choice( flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) else: flatten_action = np.random.choice(flatten_actions, p=probs) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if self.is_output_analysis: action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # probs -> action_probs for one_action, one_prob in zip(actions, probs): action_probs[one_action[0], one_action[1]] = one_prob self.output_analysis(action_probs) if is_output_action: print("AI player {0} moves ({1}, {2})".format( self.name, action[0], action[1])) return action
def self_play(self, temp=1e-3): """ Self-play, return to all boards after the game, the probability of losing all positions, and reward of victory or lose. :param temp: Temperature parameter (Degree of exploration). :return: [(boards, all_action_probs, values)] """ board_inputs, all_action_probs, current_player = [], [], [] board = Board() self.reset() while True: self.run(board, self.search_times) # Get actions and probabilities. actions, probs = self.get_action_probs(temp=temp) action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # actions, probs -> action_probs for action, prob in zip(actions, probs): action_probs[action[0], action[1]] = prob # Collect self play data. board_inputs.append(self.board_to_xlabel(board)) all_action_probs.append(action_probs) current_player.append(board.current_player) # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) # Add Dirichlet Noise for exploration in training. flatten_action = np.random.choice( flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) # Reset the root node. if action in self.root.children: self.root = self.root.children[action] self.root.parent = None else: self.reset() is_over, winner = board.result() if is_over: values = np.zeros(len(current_player)) if winner != 0: values[np.array(current_player) == winner] = 1 values[np.array(current_player) != winner] = -1 return board_inputs, all_action_probs, values
def traverse(self, node: TreeNode, board: Board): """ Expand node. :param node: Current node. :param board: The board. :return: <TreeNode> Expanded nodes. """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, _ = board.result() if is_over: return node # Expand all child node. actions = board.available_actions probs = np.ones(len(actions)) / len(actions) for action, prob in zip(actions, probs): _ = node.expand(action, prob) return node
def traverse(self, node: TreeNode, board: Board): """ 扩展子节点。 Expand node. :param node: 当前节点。 Current node. :param board: 棋盘。 The board. :return: (<TreeNode>, value<int>) 扩展出的节点和需要反向传输的 value。 Expanded nodes, and the value to be backpropagated. """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) # 是否结束。 game over? is_over, winner = board.result() if is_over: if winner == board.current_player: value = 1.0 elif winner == -board.current_player: value = -1.0 else: value = 0.0 return node, value # 使用策略价值函数决策当前动作概率及评估价值。 # Use the strategy value function to decide the current action probability and evaluate the value. action_probs, value = self.policy_value_function(board) for action, probability in action_probs: _ = node.expand(action, probability) return node, value
def rollout_policy(self, board: Board): """ 决策函数,在这里随机决策。 Decision function, random decision here. :param board: 棋盘。 The board. """ # 随机执行动作。 Randomly execute actions. action = random.choice(list(board.available_actions)) # 执行。 Action. board.step(action)
def self_play(self, temp=1e-3): """ :param temp: :return: """ board_inputs, all_action_probs, current_player = [], [], [] board = Board() self.reset() while True: self.run(board, self.search_times) actions, probs = self.get_action_probs(temp=temp) action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) for action, prob in zip(actions, probs): action_probs[action[0], action[1]] = prob board_inputs.append(self.board_to_xlabel(board)) all_action_probs.append(action_probs) current_player.append(board.current_player) # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) flatten_action = np.random.choice(flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if action in self.root.children: self.root = self.root.children[action] self.root.parent = None else: self.reset() is_over, winner = board.result() if is_over: values = np.zeros(len(current_player)) if winner != 0: values[np.array(current_player) == winner] = 1 values[np.array(current_player) != winner] = -1 return board_inputs, all_action_probs, values
def take_action(self, board: Board, is_output_action=True): """ 电脑前的玩家应该采取动作了。 It's turn to you. :param board: 当前局面。 Current board. :param is_output_action: :return: <tuple (i, j)> 采取行动时,落子的坐标。 Coordinate of the action. """ print("该 {0} 落子了,它是人类选手。 It's turn to {0}, human player.".format(self.name)) while True: # 输入。 Input. input_str = input( "请输入 {0} 想要落子的坐标,格式为 \"[行],[列]\":\n" "Please input the coordinates {0} wants to move, " "the format is \"[Row],[Column]\":\n".format(self.name)) # 验证。 Validate. try: if input_str.isdigit(): print("请输入完整坐标。\nPlease enter full coordinates.\n") continue action = [int(index) for index in input_str.split(",")] except: print("输入格式有误,请重新输入。\nThe input format is incorrect. Please try again.\n") continue # 执行。 Execute. if not board.step(action): print("无法在此落子,请重新输入。\nCannot move here. Please try again.\n") continue print("人类选手 {0} 落子于 ({1}, {2})\nHuman player {0} moves ({1}, {2})\n".format(self.name, action[0], action[1])) break
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ :param board: :param is_output_action: :param running_output_function: :param is_stop: :return: """ if is_output_action: print("{0} It's turn to {0}, AI player.".format(self.name)) print("Thinking...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) actions, probs = self.get_action_probs() flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) if self.is_training: flatten_action = np.random.choice(flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) else: flatten_action = np.random.choice(flatten_actions, p=probs) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if self.is_output_analysis: action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # probs -> action_probs for one_action, one_prob in zip(actions, probs): action_probs[one_action[0], one_action[1]] = one_prob self.output_analysis(action_probs) if is_output_action: print("IA {0} ({1}, {2})\nAI player {0} moves ({1}, {2})".format(self.name, action[0], action[1])) return action
def traverse(self, node: TreeNode, board: Board): while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, _ = board.result() if is_over: return node actions = board.available_actions probs = np.ones(len(actions)) / len(actions) for action, prob in zip(actions, probs): _ = node.expand(action, prob) return node
def take_action(self, board: Board, is_output_action=True): """ 下一步 AI 玩家执行动作。 The AI player take action next step. :param board: 当前棋盘。 Current board. :param is_output_action: 是否输出执行动作。 Whether to output execution actions. """ if is_output_action: print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format(self.name)) print("思考中。。。 Thinking...") self.reset() self.run(board, self.search_times) # 取得落子动作和概率。 Get actions and probabilities. actions, probs = self.get_action_probs() # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) if self.is_training: # 训练时,增加 dirichlet 噪声。 add Dirichlet Noise for exploration in training. flatten_action = np.random.choice(flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) else: flatten_action = np.random.choice(flatten_actions, p=probs) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if self.is_output_analysis: action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # probs -> action_probs for one_action, one_prob in zip(actions, probs): action_probs[one_action[0], one_action[1]] = one_prob self.output_analysis(action_probs) if is_output_action: print("AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".format(self.name, action[0], action[1]))
def take_action(self, board: Board): """ 下一步 AI 玩家执行动作。 The AI player take action next step. :param board: 当前棋盘。 Current board. """ print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format( self.name)) print("思考中。。。 Thinking...") self.reset() self.run(board, self.search_times) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() print( "AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".format( self.name, action[0], action[1]))
def traverse(self, node: TreeNode, board: Board): """ 扩展子节点。 Expand node. :param node: 当前节点。 Current node. :param board: :return: <TreeNode> 扩展出的节点。 Expanded nodes. """ while True: is_over, _ = board.result() if is_over: break if len(node.children) != 0: action, node = node.choose_best_child(c=5.0) board.step(action) else: actions = board.available_actions probs = np.ones(len(actions)) / len(actions) # 扩展所有子节点。 Expand all child node. for action, prob in zip(actions, probs): _ = node.expand(action, prob) break return node, board
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ It's turn to you. :param board: Current board. :param is_output_action: Whether to output action information. :param running_output_function: running output function. :param is_stop: Ask whether to stop. :return: <tuple (i, j)> Coordinate of the action. """ print(" It's turn to {0}, human player.".format(self.name)) while True: # Input. input_str = input( "Please input the coordinates {0} wants to move, " "the format is \"[Row],[Column]\":\n".format(self.name)) # Validate. try: if input_str.isdigit(): print("Please enter full coordinates.\n") continue action = [int(index) for index in input_str.split(",")] except: print("The input format is incorrect. Please try again.\n") continue # Execute. if not board.step(action): print("Cannot move here. Please try again.\n") continue print("Human player {0} moves ({1}, {2})\n".format( self.name, action[0], action[1])) break return action
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ :param board: :param is_output_action: :param running_output_function: :param is_stop: :return: """ print("{0} It's turn to {0}, human player.".format(self.name)) while True: input_str = input( "{0}}\"...\":\n" "Please input the coordinates {0} wants to move, " "the format is \"[Row],[Column]\":\n".format(self.name)) try: if input_str.isdigit(): print("\nPlease enter full coordinates.\n") continue action = [int(index) for index in input_str.split(",")] except: print("\nThe input format is incorrect. Please try again.\n") continue if not board.step(action): print("\nCannot move here. Please try again.\n") continue print("({1}, {2})\nHuman player {0} moves ({1}, {2})\n".format( self.name, action[0], action[1])) break return action
def rollout_policy(self, board: Board): action = random.choice(list(board.available_actions)) board.step(action)