Example #1
0
    def take_action(self,
                    board: Board,
                    is_output_action=True,
                    running_output_function=None,
                    is_stop=None):
        """
        下一步 AI 玩家执行动作。
        The AI player take action next step.
        :param board: 当前棋盘。 Current board.
        :param is_output_action: 是否输出 action 信息。 Whether to output action information.
        :param running_output_function: 输出 running 的函数。 running output function.
        :param is_stop: 询问是否停止。 Ask whether to stop.
        :return: <tuple (i, j)> 采取行动时,落子的坐标。 Coordinate of the action.
        """
        if is_output_action:
            print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format(
                self.name))
            print("思考中。。。 Thinking...")

        self.reset()
        self.run(board,
                 self.search_times,
                 running_output_function,
                 is_stop=is_stop)
        action, _ = self.root.choose_best_child(0)
        board.step(action)

        if self.is_output_analysis:
            self.output_analysis()

        if is_output_action:
            print("AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".
                  format(self.name, action[0], action[1]))

        return action
Example #2
0
    def _reset(self):
        K.clear_session()
        self.AI = [AI(self.cv, self.cg), AI(self.cv, max(self.cg - 1, 0))]
        self._library = Library()
        self._board = Board()
        self._player1 = Player("A")
        self._player2 = Player("B")
        self._players = [self._player1, self._player2]
        self._scores = [Score(), Score()]

        self._turn_side = random.randint(0, 1)  # if 0, player 1 starts first
        if self._is_user:
            if self._turn_side == 0:
                print("Player First")
            else:
                print("Player Second")
        self._library.shuffle()
        for i in range(10):
            self._player1.draw(self._library.draw())
            self._player2.draw(self._library.draw())
        for i in range(8):
            self._board.put(self._library.draw())

        if self._is_user:
            print(self)
    def rollout_policy(self, board: Board):
        """
        决策函数,选择子节点的概率决策。
        Policy function, a probabilistic decision to select child nodes.
        :param node: 当前节点。 Current node.
        :param board:
        :return: <TreeNode> 决策出的节点。 The decision node.
        """

        c1 = datetime.datetime.now()
        # 所有执行动作的概率相同。 All actions have the same probability.
        actions = list(board.available_actions)
        probs = np.ones(len(actions)) / len(actions)

        c2 = datetime.datetime.now()
        # 获得动作和概率。 Get action and probability.
        action_index = np.random.choice(range(len(actions)))
        action = actions[action_index]
        prob = probs[action_index]

        c3 = datetime.datetime.now()
        # 执行。 Action.
        board.step(action)

        c4 = datetime.datetime.now()
        time = ((c2 - c1).microseconds, (c3 - c2).microseconds,
                (c4 - c3).microseconds)
Example #4
0
 def rollout_policy(self, board: Board):
     """
     Decision function, random decision here.
     :param board: The board.
     """
     # Randomly execute actions.
     board.step(random.choice(list(board.available_actions)))
Example #5
0
    def traverse(self, node: TreeNode, board: Board):
        """
        Expand node.
        :param node: Current node.
        :param board: The board.
        :return: <TreeNode> Expanded nodes.
        """
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        is_over, _ = board.result()
        if is_over:
            return node

        # Expand all child node.
        actions = board.available_actions
        probs = np.ones(len(actions)) / len(actions)

        for action, prob in zip(actions, probs):
            _ = node.expand(action, prob)

        return node
Example #6
0
    def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None):
        """
        The AI player take action next step.
        :param board: Current board.
        :param is_output_action: Whether to output action information.
        :param running_output_function: running output function.
        :param is_stop: Ask whether to stop.
        :return: <tuple (i, j)> Coordinate of the action.
        """
        if is_output_action:
            print("It's turn to {0}, AI player.".format(self.name))
            print("Thinking ASAP ...")

        self.reset()
        self.run(board, self.search_times, running_output_function, is_stop=is_stop)
        action, _ = self.root.choose_best_child(0)
        board.step(action)

        if self.is_output_analysis:
            self.output_analysis()

        if is_output_action:
            print("AI player {0} moves ({1}, {2})".format(self.name, action[0], action[1]))

        return action
Example #7
0
    def traverse(self, node: TreeNode, board: Board):
        """

        :param node:
        :param board:
        :return:
        """
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        is_over, winner = board.result()
        if is_over:
            if winner == board.current_player:
                value = 1.0
            elif winner == -board.current_player:
                value = -1.0
            else:
                value = 0.0
            return node, value

        action_probs, value = self.policy_value_function(board)

        for action, probability in action_probs:
            _ = node.expand(action, probability)

        return node, value
    def take_action(self,
                    board: Board,
                    is_output_action=True,
                    running_output_function=None,
                    is_stop=None):
        """
        The AI player take action next step.
        :param board: Current board.
        :param is_output_action:  Whether to output execution actions.
        :param running_output_function:  running output function.
        :param is_stop: Ask whether to stop.
        :return: <tuple (i, j)>  Coordinate of the action.
        """
        if is_output_action:
            print("It's turn to {0}, AI player.".format(self.name))
            print("Thinking ASAP ...")

        self.reset()
        self.run(board,
                 self.search_times,
                 running_output_function,
                 is_stop=is_stop)

        # Get actions and probabilities.
        actions, probs = self.get_action_probs()

        # action -> flatten_action
        flatten_actions = []
        for one_action in actions:
            flatten_actions.append(one_action[0] * BOARD.board_size +
                                   one_action[1])

        if self.is_training:
            # add Dirichlet Noise for exploration in training.
            flatten_action = np.random.choice(
                flatten_actions,
                p=0.75 * probs +
                0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))
        else:
            flatten_action = np.random.choice(flatten_actions, p=probs)

        # flatten_action -> action
        action = (flatten_action // BOARD.board_size,
                  flatten_action % BOARD.board_size)

        board.step(action)

        if self.is_output_analysis:
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))
            # probs -> action_probs
            for one_action, one_prob in zip(actions, probs):
                action_probs[one_action[0], one_action[1]] = one_prob

            self.output_analysis(action_probs)

        if is_output_action:
            print("AI player {0} moves ({1}, {2})".format(
                self.name, action[0], action[1]))

        return action
    def self_play(self, temp=1e-3):
        """
        Self-play, return to all boards after the game,
        the probability of losing all positions,
        and reward of victory or lose.
        :param temp: Temperature parameter (Degree of exploration).
        :return: [(boards, all_action_probs, values)]
        """
        board_inputs, all_action_probs, current_player = [], [], []
        board = Board()
        self.reset()

        while True:
            self.run(board, self.search_times)

            # Get actions and probabilities.
            actions, probs = self.get_action_probs(temp=temp)
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))

            # actions, probs -> action_probs
            for action, prob in zip(actions, probs):
                action_probs[action[0], action[1]] = prob

            # Collect self play data.
            board_inputs.append(self.board_to_xlabel(board))
            all_action_probs.append(action_probs)
            current_player.append(board.current_player)

            # action -> flatten_action
            flatten_actions = []
            for one_action in actions:
                flatten_actions.append(one_action[0] * BOARD.board_size +
                                       one_action[1])

            # Add Dirichlet Noise for exploration in training.
            flatten_action = np.random.choice(
                flatten_actions,
                p=0.75 * probs +
                0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))

            # flatten_action -> action
            action = (flatten_action // BOARD.board_size,
                      flatten_action % BOARD.board_size)

            board.step(action)

            # Reset the root node.
            if action in self.root.children:
                self.root = self.root.children[action]
                self.root.parent = None
            else:
                self.reset()

            is_over, winner = board.result()
            if is_over:
                values = np.zeros(len(current_player))
                if winner != 0:
                    values[np.array(current_player) == winner] = 1
                    values[np.array(current_player) != winner] = -1
                return board_inputs, all_action_probs, values
Example #10
0
    def take_action(self,
                    board: Board,
                    is_output_action=True,
                    running_output_function=None,
                    is_stop=None):
        if is_output_action:
            print("{0} It's turn to {0}, AI player.".format(self.name))
            print("Thinking...")

        self.reset()
        self.run(board,
                 self.search_times,
                 running_output_function,
                 is_stop=is_stop)
        action, _ = self.root.choose_best_child(0)
        board.step(action)

        if self.is_output_analysis:
            self.output_analysis()

        if is_output_action:
            print("IA {0} ({1}, {2})\nAI player {0} moves ({1}, {2})".format(
                self.name, action[0], action[1]))

        return action
Example #11
0
    def traverse(self, node: TreeNode, board: Board):
        """
        扩展子节点。
        Expand node.
        :param node: 当前节点。 Current node.
        :param board: 棋盘。 The board.
        :return: (<TreeNode>, value<int>) 扩展出的节点和需要反向传输的 value。
        Expanded nodes, and the value to be backpropagated.
        """
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        # 是否结束。 game over?
        is_over, winner = board.result()
        if is_over:
            if winner == board.current_player:
                value = 1.0
            elif winner == -board.current_player:
                value = -1.0
            else:
                value = 0.0
            return node, value

        # 使用策略价值函数决策当前动作概率及评估价值。
        # Use the strategy value function to decide the current action probability and evaluate the value.
        action_probs, value = self.policy_value_function(board)

        for action, probability in action_probs:
            _ = node.expand(action, probability)

        return node, value
Example #12
0
 def __init__(self, boardDimension):
     self.boardDimension = boardDimension
     self.players = self.generateDefaultPlayers()
     self.items = self.generateDefaultItems()
     self.board = Board(self.boardDimension, self.players, self.items)
     self.currentPlayer = self.players[0]
     self.moves = self.importMoveListFromFile("test1")
     self.finalState = self.generateFinalState()
Example #13
0
 def __init__(self):
     # Enumerate all possible 3 in a row positions
     self.win_positions = [[0, 4, 8], [2, 4, 6], [0, 1, 2], [3, 4, 5],
                           [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8]]
     self.board = Board()
     self.human = Player()
     self.bot = Player(True)
     self.opening_move = True
Example #14
0
    def rollout_policy(self, board: Board):
        """
        决策函数,在这里随机决策。
        Decision function, random decision here.
        :param board: 棋盘。 The board.
        """

        # 随机执行动作。 Randomly execute actions.
        action = random.choice(list(board.available_actions))

        # 执行。 Action.
        board.step(action)
Example #15
0
    def self_play(self, temp=1e-3):
        """
        :param temp:
        :return:
        """
        board_inputs, all_action_probs, current_player = [], [], []
        board = Board()
        self.reset()

        while True:
            self.run(board, self.search_times)

            actions, probs = self.get_action_probs(temp=temp)
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))

            for action, prob in zip(actions, probs):
                action_probs[action[0], action[1]] = prob

            board_inputs.append(self.board_to_xlabel(board))
            all_action_probs.append(action_probs)
            current_player.append(board.current_player)

            # action -> flatten_action
            flatten_actions = []
            for one_action in actions:
                flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1])

            flatten_action = np.random.choice(flatten_actions,
                                              p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))

            # flatten_action -> action
            action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size)

            board.step(action)

            if action in self.root.children:
                self.root = self.root.children[action]
                self.root.parent = None
            else:
                self.reset()

            is_over, winner = board.result()
            if is_over:
                values = np.zeros(len(current_player))
                if winner != 0:
                    values[np.array(current_player) == winner] = 1
                    values[np.array(current_player) != winner] = -1
                return board_inputs, all_action_probs, values
Example #16
0
    def take_action(self, board: Board, is_output_action=True):
        """
        电脑前的玩家应该采取动作了。 It's turn to you.
        :param board: 当前局面。 Current board.
        :param is_output_action:
        :return: <tuple (i, j)> 采取行动时,落子的坐标。 Coordinate of the action.
        """
        print("该 {0} 落子了,它是人类选手。 It's turn to {0}, human player.".format(self.name))
        while True:
            # 输入。 Input.
            input_str = input(
                "请输入 {0} 想要落子的坐标,格式为 \"[行],[列]\":\n"
                "Please input the coordinates {0} wants to move, "
                "the format is \"[Row],[Column]\":\n".format(self.name))

            # 验证。 Validate.
            try:
                if input_str.isdigit():
                    print("请输入完整坐标。\nPlease enter full coordinates.\n")
                    continue
                action = [int(index) for index in input_str.split(",")]
            except:
                print("输入格式有误,请重新输入。\nThe input format is incorrect. Please try again.\n")
                continue

            # 执行。 Execute.
            if not board.step(action):
                print("无法在此落子,请重新输入。\nCannot move here. Please try again.\n")
                continue

            print("人类选手 {0} 落子于 ({1}, {2})\nHuman player {0} moves ({1}, {2})\n".format(self.name, action[0], action[1]))
            break
Example #17
0
 def rollout(self, board: Board):
     while True:
         is_over, winner = board.result()
         if is_over:
             break
         self.rollout_policy(board)
     return winner
Example #18
0
def main():
    # 920 Game for epsilon to decay to min
    numberOfGames = 1000000
    iterable = None
    try:
        from tqdm import trange
        iterable = trange(numberOfGames)
    except:
        iterable = range(numberOfGames)

    outputDir = 'DQNAgent/model_output/'
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)

    model = Model()

    agent1 = DQNAgent(model)
    agent2 = DQNAgent(model)
    for gameNumber in iterable:
        game = DQNGame(Board(7,7), agent1, agent2)
        moves, boards, winningSide = game.play()
        
        # extract state, move, nextState, done
        memory = getStateActionNextStateReward(boards, moves, winningSide)
        model.rememeber(memory)

        if (len(model.memory) > model.batchSize):
            model.replay()

        if gameNumber % 50 == 0:
            print("weights_" + '{:04d}:'.format(gameNumber), end="")
            benchmarkAgainstRandomPlayer(model)
            print()

            model.save(outputDir + "weights_" + '{:04d}'.format(gameNumber) + ".hdf5")
Example #19
0
    def __init__(self, board=None, food=None, moves_threshold=1000):
        # Board
        if board is None:
            board = Board()
        elif isinstance(board, Board):
            board = Board(board)
        self.board = board

        # Food
        self.food = food

        # Moves threshold
        self.moves_threshold = moves_threshold

        # Initialize position
        self.move([0, 0])
Example #20
0
    def processMoves(self):
        self.board = Board(self.boardDimension, self.players, self.items)

        movesQueue = deque(self.moves)
        if (movesQueue.popleft() != "GAME-START"):
            # TODO Handle invalid entry error
            print("Invalid moves file")

        while len(movesQueue) > 0:
            move = movesQueue.popleft()
            # for move in iter(movesQueue.popleft, None):
            if (move == "GAME-END"):
                print("GAME-END... Game has finished")
                print(self.board.toStringDeaths(self.players))
            else:
                self.readMove(move)
Example #21
0
def play_web_game(is_stop, player1: Player, player2: Player, turn_to, send_board_step,
                  send_player1_running, send_player2_running, wait_human_action, game_over):
    board = Board()
    while not is_stop():
        turn_to(board.current_player)

        if board.current_player == BOARD.o:
            if isinstance(player1, Human):
                action = wait_human_action(1, is_stop)
                if is_stop():
                    return
                board.step(action)
            else:
                action = player1.take_action(board, is_output_action=False,
                                             running_output_function=send_player1_running, is_stop=is_stop)
            send_board_step(1, action)
        else:
            if isinstance(player2, Human):
                action = wait_human_action(2, is_stop)
                if is_stop():
                    return
                board.step(action)
            else:
                action = player2.take_action(board, is_output_action=False,
                                             running_output_function=send_player2_running, is_stop=is_stop)
            send_board_step(2, action)

        is_over, winner = board.result()
        if is_over:
            game_over(winner)
            return
Example #22
0
    def take_action(self, board: Board, is_output_action=True, running_output_function=None, is_stop=None):
        """

        :param board:
        :param is_output_action:
        :param running_output_function:
        :param is_stop:
        :return:
        """
        if is_output_action:
            print("{0} It's turn to {0}, AI player.".format(self.name))
            print("Thinking...")

        self.reset()
        self.run(board, self.search_times, running_output_function, is_stop=is_stop)

        actions, probs = self.get_action_probs()

        flatten_actions = []
        for one_action in actions:
            flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1])

        if self.is_training:
            flatten_action = np.random.choice(flatten_actions,
                                              p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))
        else:
            flatten_action = np.random.choice(flatten_actions, p=probs)

        # flatten_action -> action
        action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size)

        board.step(action)

        if self.is_output_analysis:
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))
            # probs -> action_probs
            for one_action, one_prob in zip(actions, probs):
                action_probs[one_action[0], one_action[1]] = one_prob

            self.output_analysis(action_probs)

        if is_output_action:
            print("IA {0} ({1}, {2})\nAI player {0} moves ({1}, {2})".format(self.name, action[0], action[1]))

        return action
Example #23
0
def start_until_game_over(player1: Player,
                          player2: Player,
                          board_renderer: BoardRenderer = None):
    """
    玩家 player1 和玩家 player2 在 board 上进行游戏直到游戏结束,并输出获胜者。
    Player player1 and player2 play on the board until the game is over, and output the winner.
    :param player1: 玩家 1。 Player 1.
    :param player2: 玩家 2。 Player 2.
    :param board_renderer: 棋盘渲染器。 The board renderer.
    :return: <int> board 返回的获胜者。 The winner returned by board.
    """
    board = Board()
    while True:
        # 渲染。 Render.
        if board_renderer is not None:
            board.render(board_renderer)

        # 执行动作。 Take action.
        if board.current_player == BOARD.o:
            player1.take_action(board,
                                is_output_action=board_renderer is not None)
        else:
            player2.take_action(board,
                                is_output_action=board_renderer is not None)

        # 游戏是否结束。 Game over?
        is_over, winner = board.result()
        if is_over:
            if board_renderer is not None:
                board.render(board_renderer)
            return winner
def start_until_game_over(player1: Player,
                          player2: Player,
                          board_renderer: BoardRenderer = None):
    """
    Player player1 and player2 play on the board until the game is over, and output the winner.
    :param player1: Player 1.
    :param player2: Player 2.
    :param board_renderer: The board renderer.
    :return: <int> board The winner returned by board.
    """
    board = Board()
    while True:
        # Render.
        if board_renderer is not None:
            board.render(board_renderer)

        # Take action.
        if board.current_player == BOARD.o:
            player1.take_action(board,
                                is_output_action=board_renderer is not None)
        else:
            player2.take_action(board,
                                is_output_action=board_renderer is not None)

        # Game over?
        is_over, winner = board.result()
        if is_over:
            if board_renderer is not None:
                board.render(board_renderer)
            return winner
Example #25
0
    def traverse(self, node: TreeNode, board: Board):
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        is_over, _ = board.result()
        if is_over:
            return node

        actions = board.available_actions
        probs = np.ones(len(actions)) / len(actions)

        for action, prob in zip(actions, probs):
            _ = node.expand(action, prob)

        return node
    def handleEvents(self, events):
        super(GameOverScene, self).handleEvents(events)

        for event in events:
            if event.type == pygame.QUIT:
                exit()

            if event.type == pygame.KEYDOWN:

                if event.key == pygame.K_ESCAPE:
                    exit()

                if event.key == pygame.K_F4:
                    exit()

                if event.key == pygame.K_F1:
                    Board.newboard(self)
                    self.getGame().changeScene(GameConstants.MENU_SCENE)
Example #27
0
    def take_action(self, board: Board, is_output_action=True):
        """
        下一步 AI 玩家执行动作。
        The AI player take action next step.
        :param board: 当前棋盘。 Current board.
        :param is_output_action: 是否输出执行动作。 Whether to output execution actions.
        """
        if is_output_action:
            print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format(self.name))
            print("思考中。。。 Thinking...")

        self.reset()
        self.run(board, self.search_times)

        # 取得落子动作和概率。 Get actions and probabilities.
        actions, probs = self.get_action_probs()

        # action -> flatten_action
        flatten_actions = []
        for one_action in actions:
            flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1])

        if self.is_training:
            # 训练时,增加 dirichlet 噪声。 add Dirichlet Noise for exploration in training.
            flatten_action = np.random.choice(flatten_actions,
                                              p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))
        else:
            flatten_action = np.random.choice(flatten_actions, p=probs)

        # flatten_action -> action
        action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size)

        board.step(action)

        if self.is_output_analysis:
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))
            # probs -> action_probs
            for one_action, one_prob in zip(actions, probs):
                action_probs[one_action[0], one_action[1]] = one_prob

            self.output_analysis(action_probs)

        if is_output_action:
            print("AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".format(self.name, action[0], action[1]))
    def take_action(self, board: Board):
        """
        下一步 AI 玩家执行动作。
        The AI player take action next step.
        :param board: 当前棋盘。 Current board.
        """
        print("该 {0} 落子了,它是 AI 选手。 It's turn to {0}, AI player.".format(
            self.name))
        print("思考中。。。 Thinking...")

        self.reset()
        self.run(board, self.search_times)
        action, _ = self.root.choose_best_child(0)
        board.step(action)

        if self.is_output_analysis:
            self.output_analysis()

        print(
            "AI 选手 {0} 落子于 ({1}, {2})\nAI player {0} moves ({1}, {2})".format(
                self.name, action[0], action[1]))
Example #29
0
 def rollout(self, board: Board):
     """
     Simulation.
     :param board: The board.
     :return: winner<int> winner.
     """
     while True:
         is_over, winner = board.result()
         if is_over:
             break
         # Decision making next step.
         self.rollout_policy(board)
     return winner
    def traverse(self, node: TreeNode, board: Board):
        """
        扩展子节点。
        Expand node.
        :param node: 当前节点。 Current node.
        :param board:
        :return: <TreeNode> 扩展出的节点。 Expanded nodes.
        """
        while True:
            is_over, _ = board.result()
            if is_over:
                break
            if len(node.children) != 0:
                action, node = node.choose_best_child(c=5.0)
                board.step(action)
            else:
                actions = board.available_actions
                probs = np.ones(len(actions)) / len(actions)

                # 扩展所有子节点。 Expand all child node.
                for action, prob in zip(actions, probs):
                    _ = node.expand(action, prob)
                break
        return node, board