def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ 下一步 AI 玩家执行动作。 The AI player take action next step. :param board: 当前棋盘。 Current board. :param is_output_action: 是否输出 action 信息。 Whether to output action information. :param running_output_function: 输出 running 的函数。 running output function. :param is_stop: 询问是否停止。 Ask whether to stop. :return: <tuple (i, j)> 采取行动时,落子的坐标。 Coordinate of the action. """ if is_output_action: print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format( self.name)) print("思考中。。。 Thinking...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() if is_output_action: print("AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})". format(self.name, action[0], action[1])) return action
def _reset(self): K.clear_session() self.AI = [AI(self.cv, self.cg), AI(self.cv, max(self.cg - 1, 0))] self._library = Library() self._board = Board() self._player1 = Player("A") self._player2 = Player("B") self._players = [self._player1, self._player2] self._scores = [Score(), Score()] self._turn_side = random.randint(0, 1) # if 0, player 1 starts first if self._is_user: if self._turn_side == 0: print("Player First") else: print("Player Second") self._library.shuffle() for i in range(10): self._player1.draw(self._library.draw()) self._player2.draw(self._library.draw()) for i in range(8): self._board.put(self._library.draw()) if self._is_user: print(self)
def rollout_policy(self, board: Board): """ 决策函数,选择子节点的概率决策。 Policy function, a probabilistic decision to select child nodes. :param node: 当前节点。 Current node. :param board: :return: <TreeNode> 决策出的节点。 The decision node. """ c1 = datetime.datetime.now() # 所有执行动作的概率相同。 All actions have the same probability. actions = list(board.available_actions) probs = np.ones(len(actions)) / len(actions) c2 = datetime.datetime.now() # 获得动作和概率。 Get action and probability. action_index = np.random.choice(range(len(actions))) action = actions[action_index] prob = probs[action_index] c3 = datetime.datetime.now() # 执行。 Action. board.step(action) c4 = datetime.datetime.now() time = ((c2 - c1).microseconds, (c3 - c2).microseconds, (c4 - c3).microseconds)
def rollout_policy(self, board: Board): """ Decision function, random decision here. :param board: The board. """ # Randomly execute actions. board.step(random.choice(list(board.available_actions)))
def traverse(self, node: TreeNode, board: Board): """ Expand node. :param node: Current node. :param board: The board. :return: <TreeNode> Expanded nodes. """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, _ = board.result() if is_over: return node # Expand all child node. actions = board.available_actions probs = np.ones(len(actions)) / len(actions) for action, prob in zip(actions, probs): _ = node.expand(action, prob) return node
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ The AI player take action next step. :param board: Current board. :param is_output_action: Whether to output action information. :param running_output_function: running output function. :param is_stop: Ask whether to stop. :return: <tuple (i, j)> Coordinate of the action. """ if is_output_action: print("It's turn to {0}, AI player.".format(self.name)) print("Thinking ASAP ...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() if is_output_action: print("AI player {0} moves ({1}, {2})".format(self.name, action[0], action[1])) return action
def traverse(self, node: TreeNode, board: Board): """ :param node: :param board: :return: """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, winner = board.result() if is_over: if winner == board.current_player: value = 1.0 elif winner == -board.current_player: value = -1.0 else: value = 0.0 return node, value action_probs, value = self.policy_value_function(board) for action, probability in action_probs: _ = node.expand(action, probability) return node, value
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ The AI player take action next step. :param board: Current board. :param is_output_action: Whether to output execution actions. :param running_output_function: running output function. :param is_stop: Ask whether to stop. :return: <tuple (i, j)> Coordinate of the action. """ if is_output_action: print("It's turn to {0}, AI player.".format(self.name)) print("Thinking ASAP ...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) # Get actions and probabilities. actions, probs = self.get_action_probs() # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) if self.is_training: # add Dirichlet Noise for exploration in training. flatten_action = np.random.choice( flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) else: flatten_action = np.random.choice(flatten_actions, p=probs) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if self.is_output_analysis: action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # probs -> action_probs for one_action, one_prob in zip(actions, probs): action_probs[one_action[0], one_action[1]] = one_prob self.output_analysis(action_probs) if is_output_action: print("AI player {0} moves ({1}, {2})".format( self.name, action[0], action[1])) return action
def self_play(self, temp=1e-3): """ Self-play, return to all boards after the game, the probability of losing all positions, and reward of victory or lose. :param temp: Temperature parameter (Degree of exploration). :return: [(boards, all_action_probs, values)] """ board_inputs, all_action_probs, current_player = [], [], [] board = Board() self.reset() while True: self.run(board, self.search_times) # Get actions and probabilities. actions, probs = self.get_action_probs(temp=temp) action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # actions, probs -> action_probs for action, prob in zip(actions, probs): action_probs[action[0], action[1]] = prob # Collect self play data. board_inputs.append(self.board_to_xlabel(board)) all_action_probs.append(action_probs) current_player.append(board.current_player) # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) # Add Dirichlet Noise for exploration in training. flatten_action = np.random.choice( flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) # Reset the root node. if action in self.root.children: self.root = self.root.children[action] self.root.parent = None else: self.reset() is_over, winner = board.result() if is_over: values = np.zeros(len(current_player)) if winner != 0: values[np.array(current_player) == winner] = 1 values[np.array(current_player) != winner] = -1 return board_inputs, all_action_probs, values
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): if is_output_action: print("{0} It's turn to {0}, AI player.".format(self.name)) print("Thinking...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() if is_output_action: print("IA {0} ({1}, {2})\nAI player {0} moves ({1}, {2})".format( self.name, action[0], action[1])) return action
def traverse(self, node: TreeNode, board: Board): """ 扩展子节点。 Expand node. :param node: 当前节点。 Current node. :param board: 棋盘。 The board. :return: (<TreeNode>, value<int>) 扩展出的节点和需要反向传输的 value。 Expanded nodes, and the value to be backpropagated. """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) # 是否结束。 game over? is_over, winner = board.result() if is_over: if winner == board.current_player: value = 1.0 elif winner == -board.current_player: value = -1.0 else: value = 0.0 return node, value # 使用策略价值函数决策当前动作概率及评估价值。 # Use the strategy value function to decide the current action probability and evaluate the value. action_probs, value = self.policy_value_function(board) for action, probability in action_probs: _ = node.expand(action, probability) return node, value
def __init__(self, boardDimension): self.boardDimension = boardDimension self.players = self.generateDefaultPlayers() self.items = self.generateDefaultItems() self.board = Board(self.boardDimension, self.players, self.items) self.currentPlayer = self.players[0] self.moves = self.importMoveListFromFile("test1") self.finalState = self.generateFinalState()
def __init__(self): # Enumerate all possible 3 in a row positions self.win_positions = [[0, 4, 8], [2, 4, 6], [0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8]] self.board = Board() self.human = Player() self.bot = Player(True) self.opening_move = True
def rollout_policy(self, board: Board): """ 决策函数,在这里随机决策。 Decision function, random decision here. :param board: 棋盘。 The board. """ # 随机执行动作。 Randomly execute actions. action = random.choice(list(board.available_actions)) # 执行。 Action. board.step(action)
def self_play(self, temp=1e-3): """ :param temp: :return: """ board_inputs, all_action_probs, current_player = [], [], [] board = Board() self.reset() while True: self.run(board, self.search_times) actions, probs = self.get_action_probs(temp=temp) action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) for action, prob in zip(actions, probs): action_probs[action[0], action[1]] = prob board_inputs.append(self.board_to_xlabel(board)) all_action_probs.append(action_probs) current_player.append(board.current_player) # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) flatten_action = np.random.choice(flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if action in self.root.children: self.root = self.root.children[action] self.root.parent = None else: self.reset() is_over, winner = board.result() if is_over: values = np.zeros(len(current_player)) if winner != 0: values[np.array(current_player) == winner] = 1 values[np.array(current_player) != winner] = -1 return board_inputs, all_action_probs, values
def take_action(self, board: Board, is_output_action=True): """ 电脑前的玩家应该采取动作了。 It's turn to you. :param board: 当前局面。 Current board. :param is_output_action: :return: <tuple (i, j)> 采取行动时,落子的坐标。 Coordinate of the action. """ print("该 {0} 落子了,它是人类选手。 It's turn to {0}, human player.".format(self.name)) while True: # 输入。 Input. input_str = input( "请输入 {0} 想要落子的坐标,格式为 \"[行],[列]\":\n" "Please input the coordinates {0} wants to move, " "the format is \"[Row],[Column]\":\n".format(self.name)) # 验证。 Validate. try: if input_str.isdigit(): print("请输入完整坐标。\nPlease enter full coordinates.\n") continue action = [int(index) for index in input_str.split(",")] except: print("输入格式有误,请重新输入。\nThe input format is incorrect. Please try again.\n") continue # 执行。 Execute. if not board.step(action): print("无法在此落子,请重新输入。\nCannot move here. Please try again.\n") continue print("人类选手 {0} 落子于 ({1}, {2})\nHuman player {0} moves ({1}, {2})\n".format(self.name, action[0], action[1])) break
def rollout(self, board: Board): while True: is_over, winner = board.result() if is_over: break self.rollout_policy(board) return winner
def main(): # 920 Game for epsilon to decay to min numberOfGames = 1000000 iterable = None try: from tqdm import trange iterable = trange(numberOfGames) except: iterable = range(numberOfGames) outputDir = 'DQNAgent/model_output/' if not os.path.exists(outputDir): os.makedirs(outputDir) model = Model() agent1 = DQNAgent(model) agent2 = DQNAgent(model) for gameNumber in iterable: game = DQNGame(Board(7,7), agent1, agent2) moves, boards, winningSide = game.play() # extract state, move, nextState, done memory = getStateActionNextStateReward(boards, moves, winningSide) model.rememeber(memory) if (len(model.memory) > model.batchSize): model.replay() if gameNumber % 50 == 0: print("weights_" + '{:04d}:'.format(gameNumber), end="") benchmarkAgainstRandomPlayer(model) print() model.save(outputDir + "weights_" + '{:04d}'.format(gameNumber) + ".hdf5")
def __init__(self, board=None, food=None, moves_threshold=1000): # Board if board is None: board = Board() elif isinstance(board, Board): board = Board(board) self.board = board # Food self.food = food # Moves threshold self.moves_threshold = moves_threshold # Initialize position self.move([0, 0])
def processMoves(self): self.board = Board(self.boardDimension, self.players, self.items) movesQueue = deque(self.moves) if (movesQueue.popleft() != "GAME-START"): # TODO Handle invalid entry error print("Invalid moves file") while len(movesQueue) > 0: move = movesQueue.popleft() # for move in iter(movesQueue.popleft, None): if (move == "GAME-END"): print("GAME-END... Game has finished") print(self.board.toStringDeaths(self.players)) else: self.readMove(move)
def play_web_game(is_stop, player1: Player, player2: Player, turn_to, send_board_step, send_player1_running, send_player2_running, wait_human_action, game_over): board = Board() while not is_stop(): turn_to(board.current_player) if board.current_player == BOARD.o: if isinstance(player1, Human): action = wait_human_action(1, is_stop) if is_stop(): return board.step(action) else: action = player1.take_action(board, is_output_action=False, running_output_function=send_player1_running, is_stop=is_stop) send_board_step(1, action) else: if isinstance(player2, Human): action = wait_human_action(2, is_stop) if is_stop(): return board.step(action) else: action = player2.take_action(board, is_output_action=False, running_output_function=send_player2_running, is_stop=is_stop) send_board_step(2, action) is_over, winner = board.result() if is_over: game_over(winner) return
def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None): """ :param board: :param is_output_action: :param running_output_function: :param is_stop: :return: """ if is_output_action: print("{0} It's turn to {0}, AI player.".format(self.name)) print("Thinking...") self.reset() self.run(board, self.search_times, running_output_function, is_stop=is_stop) actions, probs = self.get_action_probs() flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) if self.is_training: flatten_action = np.random.choice(flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) else: flatten_action = np.random.choice(flatten_actions, p=probs) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if self.is_output_analysis: action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # probs -> action_probs for one_action, one_prob in zip(actions, probs): action_probs[one_action[0], one_action[1]] = one_prob self.output_analysis(action_probs) if is_output_action: print("IA {0} ({1}, {2})\nAI player {0} moves ({1}, {2})".format(self.name, action[0], action[1])) return action
def start_until_game_over(player1: Player, player2: Player, board_renderer: BoardRenderer = None): """ 玩家 player1 和玩家 player2 在 board 上进行游戏直到游戏结束,并输出获胜者。 Player player1 and player2 play on the board until the game is over, and output the winner. :param player1: 玩家 1。 Player 1. :param player2: 玩家 2。 Player 2. :param board_renderer: 棋盘渲染器。 The board renderer. :return: <int> board 返回的获胜者。 The winner returned by board. """ board = Board() while True: # 渲染。 Render. if board_renderer is not None: board.render(board_renderer) # 执行动作。 Take action. if board.current_player == BOARD.o: player1.take_action(board, is_output_action=board_renderer is not None) else: player2.take_action(board, is_output_action=board_renderer is not None) # 游戏是否结束。 Game over? is_over, winner = board.result() if is_over: if board_renderer is not None: board.render(board_renderer) return winner
def start_until_game_over(player1: Player, player2: Player, board_renderer: BoardRenderer = None): """ Player player1 and player2 play on the board until the game is over, and output the winner. :param player1: Player 1. :param player2: Player 2. :param board_renderer: The board renderer. :return: <int> board The winner returned by board. """ board = Board() while True: # Render. if board_renderer is not None: board.render(board_renderer) # Take action. if board.current_player == BOARD.o: player1.take_action(board, is_output_action=board_renderer is not None) else: player2.take_action(board, is_output_action=board_renderer is not None) # Game over? is_over, winner = board.result() if is_over: if board_renderer is not None: board.render(board_renderer) return winner
def traverse(self, node: TreeNode, board: Board): while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, _ = board.result() if is_over: return node actions = board.available_actions probs = np.ones(len(actions)) / len(actions) for action, prob in zip(actions, probs): _ = node.expand(action, prob) return node
def handleEvents(self, events): super(GameOverScene, self).handleEvents(events) for event in events: if event.type == pygame.QUIT: exit() if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: exit() if event.key == pygame.K_F4: exit() if event.key == pygame.K_F1: Board.newboard(self) self.getGame().changeScene(GameConstants.MENU_SCENE)
def take_action(self, board: Board, is_output_action=True): """ 下一步 AI 玩家执行动作。 The AI player take action next step. :param board: 当前棋盘。 Current board. :param is_output_action: 是否输出执行动作。 Whether to output execution actions. """ if is_output_action: print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format(self.name)) print("思考中。。。 Thinking...") self.reset() self.run(board, self.search_times) # 取得落子动作和概率。 Get actions and probabilities. actions, probs = self.get_action_probs() # action -> flatten_action flatten_actions = [] for one_action in actions: flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1]) if self.is_training: # 训练时,增加 dirichlet 噪声。 add Dirichlet Noise for exploration in training. flatten_action = np.random.choice(flatten_actions, p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs)))) else: flatten_action = np.random.choice(flatten_actions, p=probs) # flatten_action -> action action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size) board.step(action) if self.is_output_analysis: action_probs = np.zeros((BOARD.board_size, BOARD.board_size)) # probs -> action_probs for one_action, one_prob in zip(actions, probs): action_probs[one_action[0], one_action[1]] = one_prob self.output_analysis(action_probs) if is_output_action: print("AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".format(self.name, action[0], action[1]))
def take_action(self, board: Board): """ 下一步 AI 玩家执行动作。 The AI player take action next step. :param board: 当前棋盘。 Current board. """ print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format( self.name)) print("思考中。。。 Thinking...") self.reset() self.run(board, self.search_times) action, _ = self.root.choose_best_child(0) board.step(action) if self.is_output_analysis: self.output_analysis() print( "AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".format( self.name, action[0], action[1]))
def rollout(self, board: Board): """ Simulation. :param board: The board. :return: winner<int> winner. """ while True: is_over, winner = board.result() if is_over: break # Decision making next step. self.rollout_policy(board) return winner
def traverse(self, node: TreeNode, board: Board): """ 扩展子节点。 Expand node. :param node: 当前节点。 Current node. :param board: :return: <TreeNode> 扩展出的节点。 Expanded nodes. """ while True: is_over, _ = board.result() if is_over: break if len(node.children) != 0: action, node = node.choose_best_child(c=5.0) board.step(action) else: actions = board.available_actions probs = np.ones(len(actions)) / len(actions) # 扩展所有子节点。 Expand all child node. for action, prob in zip(actions, probs): _ = node.expand(action, prob) break return node, board