Python Gomoku.get_legal_moves 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: library

클래스/타입: Gomoku

메소드/함수: get_legal_moves

hotexamples.com에서의 예제들: 2

Python Gomoku.get_legal_moves - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 library.Gomoku.get_legal_moves에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Gomoku(7)

execute_move(7)

get_game_status(5)

get_current_color(3)

get_last_move(3)

get_board(2)

get_legal_moves(2)

display(1)

예제 #1

파일 보기

파일: learner.py 프로젝트: BalanceWing/alpha-zero-gomoku

    def self_play(self, first_color):
        """
        This function executes one episode of self-play, starting with player 1.
        As the game is played, each turn is added as a training example to
        train_examples. The game is played till the game ends. After the game
        ends, the outcome of the game is used to assign values to each example
        in train_examples.
        """

        train_examples = []
        gomoku = Gomoku(self.n, self.n_in_row, first_color)
        mcts = MCTS("./models/checkpoint.pt", self.thread_pool_size,
                    self.c_puct, self.num_mcts_sims, self.c_virtual_loss,
                    self.action_size, self.mcts_use_gpu)

        episode_step = 0
        while True:
            episode_step += 1

            # prob
            temp = self.temp if episode_step <= self.explore_num else 0
            prob = np.array(list(mcts.get_action_probs(gomoku, temp)))

            # generate sample
            board = tuple_2d_to_numpy_2d(gomoku.get_board())
            last_action = gomoku.get_last_move()
            cur_player = gomoku.get_current_color()

            sym = self.get_symmetries(board, prob)
            for b, p in sym:
                train_examples.append([b, last_action, cur_player, p])

            # dirichlet noise
            legal_moves = list(gomoku.get_legal_moves())
            noise = 0.25 * np.random.dirichlet(
                self.dirichlet_alpha * np.ones(np.count_nonzero(legal_moves)))

            prob_noise = 0.75 * prob
            j = 0
            for i in range(len(prob_noise)):
                if legal_moves[i] == 1:
                    prob_noise[i] += noise[j]
                    j += 1
            prob_noise /= np.sum(prob_noise)
            action = np.random.choice(len(prob_noise), p=prob_noise)

            # execute move
            gomoku.execute_move(action)
            mcts.update_with_move(action)

            # is ended
            ended, winner = gomoku.get_game_status()
            if ended == 1:
                # b, last_action, cur_player, p, v
                return [(x[0], x[1], x[2], x[3], x[2] * winner)
                        for x in train_examples]

예제 #2

파일 보기

    def self_play(self, first_color, libtorch, show):
        """
        This function executes one episode of self-play, starting with player 1.
        As the game is played, each turn is added as a training example to
        train_examples. The game is played till the game ends. After the game
        ends, the outcome of the game is used to assign values to each example
        in train_examples.
        """
        train_examples = []

        player1 = MCTS(libtorch, self.num_mcts_threads, self.c_puct,
                       self.num_mcts_sims, self.c_virtual_loss,
                       self.action_size)
        player2 = MCTS(libtorch, self.num_mcts_threads, self.c_puct,
                       self.num_mcts_sims, self.c_virtual_loss,
                       self.action_size)
        players = [player2, None, player1]
        player_index = 1

        gomoku = Gomoku(self.n, self.n_in_row, first_color)

        if show:
            self.gomoku_gui.reset_status()

        episode_step = 0
        while True:
            episode_step += 1
            player = players[player_index + 1]

            # get action prob
            if episode_step <= self.num_explore:
                prob = np.array(
                    list(player.get_action_probs(gomoku, self.temp)))
            else:
                prob = np.array(list(player.get_action_probs(gomoku, 0)))

            # generate sample
            board = tuple_2d_to_numpy_2d(gomoku.get_board())
            last_action = gomoku.get_last_move()
            cur_player = gomoku.get_current_color()

            sym = self.get_symmetries(board, prob, last_action)
            for b, p, a in sym:
                train_examples.append([b, a, cur_player, p])

            # dirichlet noise
            legal_moves = list(gomoku.get_legal_moves())
            noise = 0.1 * np.random.dirichlet(
                self.dirichlet_alpha * np.ones(np.count_nonzero(legal_moves)))

            prob = 0.9 * prob
            j = 0
            for i in range(len(prob)):
                if legal_moves[i] == 1:
                    prob[i] += noise[j]
                    j += 1
            prob /= np.sum(prob)

            # execute move
            action = np.random.choice(len(prob), p=prob)

            if show:
                self.gomoku_gui.execute_move(cur_player, action)
            gomoku.execute_move(action)
            player1.update_with_move(action)
            player2.update_with_move(action)

            # next player
            player_index = -player_index

            # is ended
            ended, winner = gomoku.get_game_status()
            if ended == 1:
                # b, last_action, cur_player, p, v
                return [(x[0], x[1], x[2], x[3], x[2] * winner)
                        for x in train_examples]